github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/pdutil/pd.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package pdutil 4 5 import ( 6 "bytes" 7 "context" 8 "crypto/tls" 9 "encoding/json" 10 "fmt" 11 "io" 12 "math" 13 "net/http" 14 "net/url" 15 "strings" 16 "time" 17 18 "github.com/coreos/go-semver/semver" 19 "github.com/docker/go-units" 20 "github.com/opentracing/opentracing-go" 21 "github.com/pingcap/errors" 22 "github.com/pingcap/failpoint" 23 "github.com/pingcap/log" 24 "github.com/pingcap/tidb/util/codec" 25 pd "github.com/tikv/pd/client" 26 pdapi "github.com/tikv/pd/server/api" 27 "go.uber.org/zap" 28 "google.golang.org/grpc" 29 30 berrors "github.com/pingcap/br/pkg/errors" 31 "github.com/pingcap/br/pkg/httputil" 32 "github.com/pingcap/br/pkg/lightning/common" 33 ) 34 35 const ( 36 clusterVersionPrefix = "pd/api/v1/config/cluster-version" 37 regionCountPrefix = "pd/api/v1/stats/region" 38 storePrefix = "pd/api/v1/store" 39 schedulerPrefix = "pd/api/v1/schedulers" 40 maxMsgSize = int(128 * units.MiB) // pd.ScanRegion may return a large response 41 scheduleConfigPrefix = "pd/api/v1/config/schedule" 42 pauseTimeout = 5 * time.Minute 43 44 // pd request retry time when connection fail 45 pdRequestRetryTime = 10 46 47 // set max-pending-peer-count to a large value to avoid scatter region failed. 48 maxPendingPeerUnlimited uint64 = math.MaxInt32 49 ) 50 51 // pauseConfigGenerator generate a config value according to store count and current value. 52 type pauseConfigGenerator func(int, interface{}) interface{} 53 54 // zeroPauseConfig sets the config to 0. 55 func zeroPauseConfig(int, interface{}) interface{} { 56 return 0 57 } 58 59 // pauseConfigMulStores multiplies the existing value by 60 // number of stores. The value is limited to 40, as larger value 61 // may make the cluster unstable. 62 func pauseConfigMulStores(stores int, raw interface{}) interface{} { 63 rawCfg := raw.(float64) 64 return math.Min(40, rawCfg*float64(stores)) 65 } 66 67 // pauseConfigFalse sets the config to "false". 68 func pauseConfigFalse(int, interface{}) interface{} { 69 return "false" 70 } 71 72 // constConfigGeneratorBuilder build a pauseConfigGenerator based on a given const value. 73 func constConfigGeneratorBuilder(val interface{}) pauseConfigGenerator { 74 return func(int, interface{}) interface{} { 75 return val 76 } 77 } 78 79 // ClusterConfig represents a set of scheduler whose config have been modified 80 // along with their original config. 81 type ClusterConfig struct { 82 // Enable PD schedulers before restore 83 Schedulers []string `json:"schedulers"` 84 // Original scheudle configuration 85 ScheduleCfg map[string]interface{} `json:"schedule_cfg"` 86 } 87 88 type pauseSchedulerBody struct { 89 Delay int64 `json:"delay"` 90 } 91 92 var ( 93 // in v4.0.8 version we can use pause configs 94 // see https://github.com/tikv/pd/pull/3088 95 pauseConfigVersion = semver.Version{Major: 4, Minor: 0, Patch: 8} 96 97 // Schedulers represent region/leader schedulers which can impact on performance. 98 Schedulers = map[string]struct{}{ 99 "balance-leader-scheduler": {}, 100 "balance-hot-region-scheduler": {}, 101 "balance-region-scheduler": {}, 102 103 "shuffle-leader-scheduler": {}, 104 "shuffle-region-scheduler": {}, 105 "shuffle-hot-region-scheduler": {}, 106 } 107 expectPDCfg = map[string]pauseConfigGenerator{ 108 "max-merge-region-keys": zeroPauseConfig, 109 "max-merge-region-size": zeroPauseConfig, 110 // TODO "leader-schedule-limit" and "region-schedule-limit" don't support ttl for now, 111 // but we still need set these config for compatible with old version. 112 // we need wait for https://github.com/tikv/pd/pull/3131 merged. 113 // see details https://github.com/pingcap/br/pull/592#discussion_r522684325 114 "leader-schedule-limit": pauseConfigMulStores, 115 "region-schedule-limit": pauseConfigMulStores, 116 "max-snapshot-count": pauseConfigMulStores, 117 "enable-location-replacement": pauseConfigFalse, 118 "max-pending-peer-count": constConfigGeneratorBuilder(maxPendingPeerUnlimited), 119 } 120 121 // defaultPDCfg find by https://github.com/tikv/pd/blob/master/conf/config.toml. 122 defaultPDCfg = map[string]interface{}{ 123 "max-merge-region-keys": 200000, 124 "max-merge-region-size": 20, 125 "leader-schedule-limit": 4, 126 "region-schedule-limit": 2048, 127 "enable-location-replacement": "true", 128 } 129 ) 130 131 // pdHTTPRequest defines the interface to send a request to pd and return the result in bytes. 132 type pdHTTPRequest func(context.Context, string, string, *http.Client, string, io.Reader) ([]byte, error) 133 134 // pdRequest is a func to send a HTTP to pd and return the result bytes. 135 func pdRequest( 136 ctx context.Context, 137 addr string, prefix string, 138 cli *http.Client, method string, body io.Reader) ([]byte, error) { 139 u, err := url.Parse(addr) 140 if err != nil { 141 return nil, errors.Trace(err) 142 } 143 reqURL := fmt.Sprintf("%s/%s", u, prefix) 144 req, err := http.NewRequestWithContext(ctx, method, reqURL, body) 145 if err != nil { 146 return nil, errors.Trace(err) 147 } 148 resp, err := cli.Do(req) 149 if err != nil { 150 return nil, errors.Trace(err) 151 } 152 count := 0 153 for { 154 count++ 155 if count > pdRequestRetryTime || resp.StatusCode < 500 { 156 break 157 } 158 resp.Body.Close() 159 time.Sleep(time.Second) 160 resp, err = cli.Do(req) 161 if err != nil { 162 return nil, errors.Trace(err) 163 } 164 } 165 defer resp.Body.Close() 166 if resp.StatusCode != http.StatusOK { 167 res, _ := io.ReadAll(resp.Body) 168 return nil, errors.Annotatef(berrors.ErrPDInvalidResponse, "[%d] %s %s", resp.StatusCode, res, reqURL) 169 } 170 171 r, err := io.ReadAll(resp.Body) 172 if err != nil { 173 return nil, errors.Trace(err) 174 } 175 return r, nil 176 } 177 178 // PdController manage get/update config from pd. 179 type PdController struct { 180 addrs []string 181 cli *http.Client 182 pdClient pd.Client 183 version *semver.Version 184 185 // control the pause schedulers goroutine 186 schedulerPauseCh chan struct{} 187 } 188 189 // NewPdController creates a new PdController. 190 func NewPdController( 191 ctx context.Context, 192 pdAddrs string, 193 tlsConf *tls.Config, 194 securityOption pd.SecurityOption, 195 ) (*PdController, error) { 196 cli := httputil.NewClient(tlsConf) 197 198 addrs := strings.Split(pdAddrs, ",") 199 processedAddrs := make([]string, 0, len(addrs)) 200 var failure error 201 var versionBytes []byte 202 for _, addr := range addrs { 203 if !strings.HasPrefix(addr, "http") { 204 if tlsConf != nil { 205 addr = "https://" + addr 206 } else { 207 addr = "http://" + addr 208 } 209 } 210 processedAddrs = append(processedAddrs, addr) 211 versionBytes, failure = pdRequest(ctx, addr, clusterVersionPrefix, cli, http.MethodGet, nil) 212 if failure == nil { 213 break 214 } 215 } 216 if failure != nil { 217 return nil, errors.Annotatef(berrors.ErrPDUpdateFailed, "pd address (%s) not available, please check network", pdAddrs) 218 } 219 220 version := parseVersion(versionBytes) 221 maxCallMsgSize := []grpc.DialOption{ 222 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)), 223 grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(maxMsgSize)), 224 } 225 pdClient, err := pd.NewClientWithContext( 226 ctx, addrs, securityOption, 227 pd.WithGRPCDialOptions(maxCallMsgSize...), 228 pd.WithCustomTimeoutOption(10*time.Second), 229 ) 230 if err != nil { 231 log.Error("fail to create pd client", zap.Error(err)) 232 return nil, errors.Trace(err) 233 } 234 235 return &PdController{ 236 addrs: processedAddrs, 237 cli: cli, 238 pdClient: pdClient, 239 version: version, 240 // We should make a buffered channel here otherwise when context canceled, 241 // gracefully shutdown will stick at resuming schedulers. 242 schedulerPauseCh: make(chan struct{}, 1), 243 }, nil 244 } 245 246 func parseVersion(versionBytes []byte) *semver.Version { 247 // we need trim space or semver will parse failed 248 v := strings.TrimSpace(string(versionBytes)) 249 v = strings.Trim(v, "\"") 250 v = strings.TrimPrefix(v, "v") 251 version, err := semver.NewVersion(v) 252 if err != nil { 253 log.Warn("fail back to v0.0.0 version", 254 zap.ByteString("version", versionBytes), zap.Error(err)) 255 version = &semver.Version{Major: 0, Minor: 0, Patch: 0} 256 } 257 failpoint.Inject("PDEnabledPauseConfig", func(val failpoint.Value) { 258 if val.(bool) { 259 // test pause config is enable 260 version = &semver.Version{Major: 5, Minor: 0, Patch: 0} 261 } 262 }) 263 return version 264 } 265 266 func (p *PdController) isPauseConfigEnabled() bool { 267 return p.version.Compare(pauseConfigVersion) >= 0 268 } 269 270 // SetHTTP set pd addrs and cli for test. 271 func (p *PdController) SetHTTP(addrs []string, cli *http.Client) { 272 p.addrs = addrs 273 p.cli = cli 274 } 275 276 // SetPDClient set pd addrs and cli for test. 277 func (p *PdController) SetPDClient(pdClient pd.Client) { 278 p.pdClient = pdClient 279 } 280 281 // GetPDClient set pd addrs and cli for test. 282 func (p *PdController) GetPDClient() pd.Client { 283 return p.pdClient 284 } 285 286 // GetClusterVersion returns the current cluster version. 287 func (p *PdController) GetClusterVersion(ctx context.Context) (string, error) { 288 return p.getClusterVersionWith(ctx, pdRequest) 289 } 290 291 func (p *PdController) getClusterVersionWith(ctx context.Context, get pdHTTPRequest) (string, error) { 292 var err error 293 for _, addr := range p.addrs { 294 v, e := get(ctx, addr, clusterVersionPrefix, p.cli, http.MethodGet, nil) 295 if e != nil { 296 err = e 297 continue 298 } 299 return string(v), nil 300 } 301 302 return "", errors.Trace(err) 303 } 304 305 // GetRegionCount returns the region count in the specified range. 306 func (p *PdController) GetRegionCount(ctx context.Context, startKey, endKey []byte) (int, error) { 307 return p.getRegionCountWith(ctx, pdRequest, startKey, endKey) 308 } 309 310 func (p *PdController) getRegionCountWith( 311 ctx context.Context, get pdHTTPRequest, startKey, endKey []byte, 312 ) (int, error) { 313 // TiKV reports region start/end keys to PD in memcomparable-format. 314 var start, end string 315 start = url.QueryEscape(string(codec.EncodeBytes(nil, startKey))) 316 if len(endKey) != 0 { // Empty end key means the max. 317 end = url.QueryEscape(string(codec.EncodeBytes(nil, endKey))) 318 } 319 var err error 320 for _, addr := range p.addrs { 321 query := fmt.Sprintf( 322 "%s?start_key=%s&end_key=%s", 323 regionCountPrefix, start, end) 324 v, e := get(ctx, addr, query, p.cli, http.MethodGet, nil) 325 if e != nil { 326 err = e 327 continue 328 } 329 regionsMap := make(map[string]interface{}) 330 err = json.Unmarshal(v, ®ionsMap) 331 if err != nil { 332 return 0, errors.Trace(err) 333 } 334 return int(regionsMap["count"].(float64)), nil 335 } 336 return 0, errors.Trace(err) 337 } 338 339 // GetStoreInfo returns the info of store with the specified id. 340 func (p *PdController) GetStoreInfo(ctx context.Context, storeID uint64) (*pdapi.StoreInfo, error) { 341 return p.getStoreInfoWith(ctx, pdRequest, storeID) 342 } 343 344 func (p *PdController) getStoreInfoWith( 345 ctx context.Context, get pdHTTPRequest, storeID uint64) (*pdapi.StoreInfo, error) { 346 var err error 347 for _, addr := range p.addrs { 348 query := fmt.Sprintf( 349 "%s/%d", 350 storePrefix, storeID) 351 v, e := get(ctx, addr, query, p.cli, http.MethodGet, nil) 352 if e != nil { 353 err = e 354 continue 355 } 356 store := pdapi.StoreInfo{} 357 err = json.Unmarshal(v, &store) 358 if err != nil { 359 return nil, errors.Trace(err) 360 } 361 return &store, nil 362 } 363 return nil, errors.Trace(err) 364 } 365 366 func (p *PdController) doPauseSchedulers(ctx context.Context, schedulers []string, post pdHTTPRequest) ([]string, error) { 367 // pause this scheduler with 300 seconds 368 body, err := json.Marshal(pauseSchedulerBody{Delay: int64(pauseTimeout)}) 369 if err != nil { 370 return nil, errors.Trace(err) 371 } 372 // PauseSchedulers remove pd scheduler temporarily. 373 removedSchedulers := make([]string, 0, len(schedulers)) 374 for _, scheduler := range schedulers { 375 prefix := fmt.Sprintf("%s/%s", schedulerPrefix, scheduler) 376 for _, addr := range p.addrs { 377 _, err = post(ctx, addr, prefix, p.cli, http.MethodPost, bytes.NewBuffer(body)) 378 if err == nil { 379 removedSchedulers = append(removedSchedulers, scheduler) 380 break 381 } 382 } 383 if err != nil { 384 return removedSchedulers, errors.Trace(err) 385 } 386 } 387 return removedSchedulers, nil 388 } 389 390 func (p *PdController) pauseSchedulersAndConfigWith( 391 ctx context.Context, schedulers []string, 392 schedulerCfg map[string]interface{}, post pdHTTPRequest, 393 ) ([]string, error) { 394 // first pause this scheduler, if the first time failed. we should return the error 395 // so put first time out of for loop. and in for loop we could ignore other failed pause. 396 removedSchedulers, err := p.doPauseSchedulers(ctx, schedulers, post) 397 if err != nil { 398 log.Error("failed to pause scheduler at beginning", 399 zap.Strings("name", schedulers), zap.Error(err)) 400 return nil, errors.Trace(err) 401 } 402 log.Info("pause scheduler successful at beginning", zap.Strings("name", schedulers)) 403 if schedulerCfg != nil { 404 err = p.doPauseConfigs(ctx, schedulerCfg, post) 405 if err != nil { 406 log.Error("failed to pause config at beginning", 407 zap.Any("cfg", schedulerCfg), zap.Error(err)) 408 return nil, errors.Trace(err) 409 } 410 log.Info("pause configs successful at beginning", zap.Any("cfg", schedulerCfg)) 411 } 412 413 go func() { 414 tick := time.NewTicker(pauseTimeout / 3) 415 defer tick.Stop() 416 417 for { 418 select { 419 case <-ctx.Done(): 420 return 421 case <-tick.C: 422 _, err := p.doPauseSchedulers(ctx, schedulers, post) 423 if err != nil { 424 log.Warn("pause scheduler failed, ignore it and wait next time pause", zap.Error(err)) 425 } 426 if schedulerCfg != nil { 427 err = p.doPauseConfigs(ctx, schedulerCfg, post) 428 if err != nil { 429 log.Warn("pause configs failed, ignore it and wait next time pause", zap.Error(err)) 430 } 431 } 432 log.Info("pause scheduler(configs)", zap.Strings("name", removedSchedulers), 433 zap.Any("cfg", schedulerCfg)) 434 case <-p.schedulerPauseCh: 435 log.Info("exit pause scheduler and configs successful") 436 return 437 } 438 } 439 }() 440 return removedSchedulers, nil 441 } 442 443 // ResumeSchedulers resume pd scheduler. 444 func (p *PdController) ResumeSchedulers(ctx context.Context, schedulers []string) error { 445 return p.resumeSchedulerWith(ctx, schedulers, pdRequest) 446 } 447 448 func (p *PdController) resumeSchedulerWith(ctx context.Context, schedulers []string, post pdHTTPRequest) (err error) { 449 log.Info("resume scheduler", zap.Strings("schedulers", schedulers)) 450 p.schedulerPauseCh <- struct{}{} 451 452 // 0 means stop pause. 453 body, err := json.Marshal(pauseSchedulerBody{Delay: 0}) 454 if err != nil { 455 return errors.Trace(err) 456 } 457 for _, scheduler := range schedulers { 458 prefix := fmt.Sprintf("%s/%s", schedulerPrefix, scheduler) 459 for _, addr := range p.addrs { 460 _, err = post(ctx, addr, prefix, p.cli, http.MethodPost, bytes.NewBuffer(body)) 461 if err == nil { 462 break 463 } 464 } 465 if err != nil { 466 log.Error("failed to resume scheduler after retry, you may reset this scheduler manually"+ 467 "or just wait this scheduler pause timeout", zap.String("scheduler", scheduler)) 468 } else { 469 log.Info("resume scheduler successful", zap.String("scheduler", scheduler)) 470 } 471 } 472 // no need to return error, because the pause will timeout. 473 return nil 474 } 475 476 // ListSchedulers list all pd scheduler. 477 func (p *PdController) ListSchedulers(ctx context.Context) ([]string, error) { 478 return p.listSchedulersWith(ctx, pdRequest) 479 } 480 481 func (p *PdController) listSchedulersWith(ctx context.Context, get pdHTTPRequest) ([]string, error) { 482 var err error 483 for _, addr := range p.addrs { 484 v, e := get(ctx, addr, schedulerPrefix, p.cli, http.MethodGet, nil) 485 if e != nil { 486 err = e 487 continue 488 } 489 d := make([]string, 0) 490 err = json.Unmarshal(v, &d) 491 if err != nil { 492 return nil, errors.Trace(err) 493 } 494 return d, nil 495 } 496 return nil, errors.Trace(err) 497 } 498 499 // GetPDScheduleConfig returns PD schedule config value associated with the key. 500 // It returns nil if there is no such config item. 501 func (p *PdController) GetPDScheduleConfig( 502 ctx context.Context, 503 ) (map[string]interface{}, error) { 504 var err error 505 for _, addr := range p.addrs { 506 v, e := pdRequest( 507 ctx, addr, scheduleConfigPrefix, p.cli, http.MethodGet, nil) 508 if e != nil { 509 err = e 510 continue 511 } 512 cfg := make(map[string]interface{}) 513 err = json.Unmarshal(v, &cfg) 514 if err != nil { 515 return nil, errors.Trace(err) 516 } 517 return cfg, nil 518 } 519 return nil, errors.Trace(err) 520 } 521 522 // UpdatePDScheduleConfig updates PD schedule config value associated with the key. 523 func (p *PdController) UpdatePDScheduleConfig(ctx context.Context) error { 524 log.Info("update pd with default config", zap.Any("cfg", defaultPDCfg)) 525 return p.doUpdatePDScheduleConfig(ctx, defaultPDCfg, pdRequest) 526 } 527 528 func (p *PdController) doUpdatePDScheduleConfig( 529 ctx context.Context, cfg map[string]interface{}, post pdHTTPRequest, prefixs ...string, 530 ) error { 531 prefix := scheduleConfigPrefix 532 if len(prefixs) != 0 { 533 prefix = prefixs[0] 534 } 535 for _, addr := range p.addrs { 536 reqData, err := json.Marshal(cfg) 537 if err != nil { 538 return errors.Trace(err) 539 } 540 _, e := post(ctx, addr, prefix, 541 p.cli, http.MethodPost, bytes.NewBuffer(reqData)) 542 if e == nil { 543 return nil 544 } 545 log.Warn("failed to update PD config, will try next", zap.Error(e), zap.String("pd", addr)) 546 } 547 return errors.Annotate(berrors.ErrPDUpdateFailed, "failed to update PD schedule config") 548 } 549 550 func (p *PdController) doPauseConfigs(ctx context.Context, cfg map[string]interface{}, post pdHTTPRequest) error { 551 // pause this scheduler with 300 seconds 552 prefix := fmt.Sprintf("%s?ttlSecond=%.0f", scheduleConfigPrefix, pauseTimeout.Seconds()) 553 return p.doUpdatePDScheduleConfig(ctx, cfg, post, prefix) 554 } 555 556 func restoreSchedulers(ctx context.Context, pd *PdController, clusterCfg ClusterConfig) error { 557 if err := pd.ResumeSchedulers(ctx, clusterCfg.Schedulers); err != nil { 558 return errors.Annotate(err, "fail to add PD schedulers") 559 } 560 log.Info("restoring config", zap.Any("config", clusterCfg.ScheduleCfg)) 561 mergeCfg := make(map[string]interface{}) 562 for cfgKey := range expectPDCfg { 563 value := clusterCfg.ScheduleCfg[cfgKey] 564 if value == nil { 565 // Ignore non-exist config. 566 continue 567 } 568 mergeCfg[cfgKey] = value 569 } 570 571 prefix := make([]string, 0, 1) 572 if pd.isPauseConfigEnabled() { 573 // set config's ttl to zero, make temporary config invalid immediately. 574 prefix = append(prefix, fmt.Sprintf("%s?ttlSecond=%d", scheduleConfigPrefix, 0)) 575 } 576 // reset config with previous value. 577 if err := pd.doUpdatePDScheduleConfig(ctx, mergeCfg, pdRequest, prefix...); err != nil { 578 return errors.Annotate(err, "fail to update PD merge config") 579 } 580 return nil 581 } 582 583 // MakeUndoFunctionByConfig return an UndoFunc based on specified ClusterConfig 584 func (p *PdController) MakeUndoFunctionByConfig(config ClusterConfig) UndoFunc { 585 restore := func(ctx context.Context) error { 586 return restoreSchedulers(ctx, p, config) 587 } 588 return restore 589 } 590 591 // RemoveSchedulers removes the schedulers that may slow down BR speed. 592 func (p *PdController) RemoveSchedulers(ctx context.Context) (undo UndoFunc, err error) { 593 undo = Nop 594 595 origin, _, err1 := p.RemoveSchedulersWithOrigin(ctx) 596 if err1 != nil { 597 err = err1 598 return 599 } 600 601 undo = p.MakeUndoFunctionByConfig(ClusterConfig{Schedulers: origin.Schedulers, ScheduleCfg: origin.ScheduleCfg}) 602 return undo, errors.Trace(err) 603 } 604 605 // RemoveSchedulersWithOrigin pause and remove br related schedule configs and return the origin and modified configs 606 func (p *PdController) RemoveSchedulersWithOrigin(ctx context.Context) (ClusterConfig, ClusterConfig, error) { 607 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 608 span1 := span.Tracer().StartSpan("PdController.RemoveSchedulers", opentracing.ChildOf(span.Context())) 609 defer span1.Finish() 610 ctx = opentracing.ContextWithSpan(ctx, span1) 611 } 612 613 originCfg := ClusterConfig{} 614 removedCfg := ClusterConfig{} 615 stores, err := p.pdClient.GetAllStores(ctx) 616 if err != nil { 617 return originCfg, removedCfg, err 618 } 619 scheduleCfg, err := p.GetPDScheduleConfig(ctx) 620 if err != nil { 621 return originCfg, removedCfg, err 622 } 623 disablePDCfg := make(map[string]interface{}, len(expectPDCfg)) 624 originPDCfg := make(map[string]interface{}, len(expectPDCfg)) 625 for cfgKey, cfgValFunc := range expectPDCfg { 626 value, ok := scheduleCfg[cfgKey] 627 if !ok { 628 // Ignore non-exist config. 629 continue 630 } 631 disablePDCfg[cfgKey] = cfgValFunc(len(stores), value) 632 originPDCfg[cfgKey] = value 633 } 634 originCfg.ScheduleCfg = originPDCfg 635 removedCfg.ScheduleCfg = disablePDCfg 636 637 log.Debug("saved PD config", zap.Any("config", scheduleCfg)) 638 639 // Remove default PD scheduler that may affect restore process. 640 existSchedulers, err := p.ListSchedulers(ctx) 641 if err != nil { 642 return originCfg, removedCfg, err 643 } 644 needRemoveSchedulers := make([]string, 0, len(existSchedulers)) 645 for _, s := range existSchedulers { 646 if _, ok := Schedulers[s]; ok { 647 needRemoveSchedulers = append(needRemoveSchedulers, s) 648 } 649 } 650 651 removedSchedulers, err := p.doRemoveSchedulersWith(ctx, needRemoveSchedulers, disablePDCfg) 652 if err != nil { 653 return originCfg, removedCfg, err 654 } 655 656 originCfg.Schedulers = removedSchedulers 657 removedCfg.Schedulers = removedSchedulers 658 659 return originCfg, removedCfg, nil 660 } 661 662 // RemoveSchedulersWithCfg removes pd schedulers and configs with specified ClusterConfig 663 func (p *PdController) RemoveSchedulersWithCfg(ctx context.Context, removeCfg ClusterConfig) error { 664 _, err := p.doRemoveSchedulersWith(ctx, removeCfg.Schedulers, removeCfg.ScheduleCfg) 665 return err 666 } 667 668 func (p *PdController) doRemoveSchedulersWith( 669 ctx context.Context, 670 needRemoveSchedulers []string, 671 disablePDCfg map[string]interface{}, 672 ) ([]string, error) { 673 var removedSchedulers []string 674 var err error 675 if p.isPauseConfigEnabled() { 676 // after 4.0.8 we can set these config with TTL 677 removedSchedulers, err = p.pauseSchedulersAndConfigWith(ctx, needRemoveSchedulers, disablePDCfg, pdRequest) 678 } else { 679 // adapt to earlier version (before 4.0.8) of pd cluster 680 // which doesn't have temporary config setting. 681 err = p.doUpdatePDScheduleConfig(ctx, disablePDCfg, pdRequest) 682 if err != nil { 683 return nil, err 684 } 685 removedSchedulers, err = p.pauseSchedulersAndConfigWith(ctx, needRemoveSchedulers, nil, pdRequest) 686 } 687 return removedSchedulers, err 688 } 689 690 // Close close the connection to pd. 691 func (p *PdController) Close() { 692 p.pdClient.Close() 693 close(p.schedulerPauseCh) 694 } 695 696 // FetchPDVersion get pd version 697 func FetchPDVersion(ctx context.Context, tls *common.TLS, pdAddr string) (*semver.Version, error) { 698 // An example of PD version API. 699 // curl http://pd_address/pd/api/v1/version 700 // { 701 // "version": "v4.0.0-rc.2-451-g760fb650" 702 // } 703 var rawVersion struct { 704 Version string `json:"version"` 705 } 706 err := tls.WithHost(pdAddr).GetJSON(ctx, "/pd/api/v1/version", &rawVersion) 707 if err != nil { 708 return nil, errors.Trace(err) 709 } 710 711 return parseVersion([]byte(rawVersion.Version)), nil 712 }