github.com/polarismesh/polaris@v1.17.8/bootstrap/server.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package bootstrap 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "net" 25 "strings" 26 "time" 27 28 "github.com/golang/protobuf/ptypes/wrappers" 29 apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage" 30 "go.uber.org/zap" 31 "gopkg.in/yaml.v2" 32 33 "github.com/polarismesh/polaris/admin" 34 "github.com/polarismesh/polaris/apiserver" 35 "github.com/polarismesh/polaris/auth" 36 boot_config "github.com/polarismesh/polaris/bootstrap/config" 37 "github.com/polarismesh/polaris/cache" 38 types "github.com/polarismesh/polaris/cache/api" 39 api "github.com/polarismesh/polaris/common/api/v1" 40 "github.com/polarismesh/polaris/common/eventhub" 41 "github.com/polarismesh/polaris/common/log" 42 "github.com/polarismesh/polaris/common/metrics" 43 "github.com/polarismesh/polaris/common/model" 44 "github.com/polarismesh/polaris/common/utils" 45 "github.com/polarismesh/polaris/common/version" 46 config_center "github.com/polarismesh/polaris/config" 47 "github.com/polarismesh/polaris/namespace" 48 "github.com/polarismesh/polaris/plugin" 49 "github.com/polarismesh/polaris/service" 50 "github.com/polarismesh/polaris/service/batch" 51 "github.com/polarismesh/polaris/service/healthcheck" 52 "github.com/polarismesh/polaris/store" 53 ) 54 55 var ( 56 SelfServiceInstance = make([]*apiservice.Instance, 0) 57 ConfigFilePath = "" 58 selfHeathChecker *SelfHeathChecker 59 ) 60 61 // Start 启动 62 func Start(configFilePath string) { 63 // 加载配置 64 ConfigFilePath = configFilePath 65 utils.ConfDir = parseConfDir(configFilePath) 66 cfg, err := boot_config.Load(configFilePath) 67 if err != nil { 68 fmt.Printf("[ERROR] load config fail\n") 69 return 70 } 71 72 c, err := yaml.Marshal(cfg) 73 if err != nil { 74 fmt.Printf("[ERROR] config yaml marshal fail\n") 75 return 76 } 77 fmt.Printf(string(c)) 78 79 // 初始化日志打印 80 err = log.Configure(cfg.Bootstrap.Logger) 81 if err != nil { 82 fmt.Printf("[ERROR] configure logger fail: %v\n", err) 83 return 84 } 85 86 // 初始化 87 ctx, cancel := context.WithCancel(context.Background()) 88 defer cancel() 89 90 // 获取本地IP地址 91 ctx, err = acquireLocalhost(ctx, &cfg.Bootstrap.PolarisService) 92 if err != nil { 93 fmt.Printf("[ERROR] acquire localhost fail: %v\n", err) 94 return 95 } 96 // 设置默认端口信息数据 97 acquireLocalPort(ctx, cfg.APIServers) 98 99 metrics.InitMetrics() 100 eventhub.InitEventHub() 101 102 // 设置插件配置 103 plugin.SetPluginConfig(&cfg.Plugin) 104 105 // 初始化存储层 106 store.SetStoreConfig(&cfg.Store) 107 var s store.Store 108 s, err = store.GetStore() 109 if err != nil { 110 fmt.Printf("[ERROR] get store fail: %v", err) 111 return 112 } 113 114 // 开启进入启动流程,初始化插件,加载数据等 115 var tx store.Transaction 116 tx, err = StartBootstrapInOrder(s, cfg) 117 if err != nil { 118 // 多次尝试加锁失败 119 fmt.Printf("[ERROR] bootstrap fail: %v\n", err) 120 return 121 } 122 err = StartComponents(ctx, cfg) 123 if err != nil { 124 fmt.Printf("[ERROR] start components fail: %v\n", err) 125 return 126 } 127 errCh := make(chan error, len(cfg.APIServers)) 128 servers, err := StartServers(ctx, cfg, errCh) 129 if err != nil { 130 fmt.Printf("[ERROR] start servers fail: %v\n", err) 131 return 132 } 133 134 if err := polarisServiceRegister(&cfg.Bootstrap.PolarisService, cfg.APIServers); err != nil { 135 fmt.Printf("[ERROR] register polaris service fail: %v\n", err) 136 return 137 } 138 _ = FinishBootstrapOrder(tx) // 启动完成,解锁 139 fmt.Println("finish starting server") 140 141 // 等待信号量 142 WaitSignal(servers, errCh) 143 fmt.Println("begin stop server") 144 } 145 146 // StartComponents start health check and naming components 147 func StartComponents(ctx context.Context, cfg *boot_config.Config) error { 148 var err error 149 150 // 获取存储层对象 151 s, err := store.GetStore() 152 if err != nil { 153 log.Errorf("[Naming][Server] can not get store, err: %s", err.Error()) 154 return errors.New("can not get store") 155 } 156 157 // 初始化缓存模块 158 if err := cache.Initialize(ctx, &cfg.Cache, s); err != nil { 159 return err 160 } 161 162 cacheMgn, err := cache.GetCacheManager() 163 if err != nil { 164 return err 165 } 166 167 // 初始化鉴权层 168 if err = auth.Initialize(ctx, &cfg.Auth, s, cacheMgn); err != nil { 169 return err 170 } 171 172 userMgn, err := auth.GetUserServer() 173 if err != nil { 174 return err 175 } 176 177 strategyMgn, err := auth.GetStrategyServer() 178 if err != nil { 179 return err 180 } 181 182 // 初始化命名空间模块 183 if err := namespace.Initialize(ctx, &cfg.Namespace, s, cacheMgn); err != nil { 184 return err 185 } 186 187 // 初始化服务发现模块相关功能 188 if err := StartDiscoverComponents(ctx, cfg, s, cacheMgn); err != nil { 189 return err 190 } 191 192 // 初始化配置中心模块相关功能 193 if err := StartConfigCenterComponents(ctx, cfg, s, cacheMgn, userMgn, strategyMgn); err != nil { 194 return err 195 } 196 197 namingSvr, err := service.GetServer() 198 if err != nil { 199 return err 200 } 201 healthCheckServer, err := healthcheck.GetServer() 202 if err != nil { 203 return err 204 } 205 206 // 初始化运维操作模块 207 if err := admin.Initialize(ctx, &cfg.Maintain, namingSvr, healthCheckServer, cacheMgn, s); err != nil { 208 return err 209 } 210 211 // 最后启动 cache 212 if err := cache.Run(cacheMgn, ctx); err != nil { 213 return err 214 } 215 216 return nil 217 } 218 219 func StartDiscoverComponents(ctx context.Context, cfg *boot_config.Config, s store.Store, 220 cacheMgn *cache.CacheManager) error { 221 // 批量控制器 222 namingBatchConfig, err := batch.ParseBatchConfig(cfg.Naming.Batch) 223 if err != nil { 224 return err 225 } 226 healthBatchConfig, err := batch.ParseBatchConfig(cfg.HealthChecks.Batch) 227 if err != nil { 228 return err 229 } 230 231 batchConfig := &batch.Config{ 232 Register: namingBatchConfig.Register, 233 Deregister: namingBatchConfig.Deregister, 234 ClientRegister: namingBatchConfig.ClientRegister, 235 ClientDeregister: namingBatchConfig.ClientDeregister, 236 Heartbeat: healthBatchConfig.Heartbeat, 237 } 238 239 bc, err := batch.NewBatchCtrlWithConfig(s, cacheMgn, batchConfig) 240 if err != nil { 241 log.Errorf("new batch ctrl with config err: %s", err.Error()) 242 return err 243 } 244 bc.Start(ctx) 245 246 if len(cfg.HealthChecks.LocalHost) == 0 { 247 cfg.HealthChecks.LocalHost = utils.LocalHost // 补充healthCheck的配置 248 } 249 if err = healthcheck.Initialize(ctx, &cfg.HealthChecks, cfg.Cache.Open, bc); err != nil { 250 return err 251 } 252 healthCheckServer, err := healthcheck.GetServer() 253 if err != nil { 254 return err 255 } 256 if cfg.HealthChecks.Open { 257 cacheProvider, err := healthCheckServer.CacheProvider() 258 if err != nil { 259 return err 260 } 261 healthCheckServer.SetServiceCache(cacheMgn.Service()) 262 healthCheckServer.SetInstanceCache(cacheMgn.Instance()) 263 // 为 instance 的 cache 添加 健康检查的 Listener 264 cacheMgn.AddListener(types.CacheInstance, []types.Listener{cacheProvider}) 265 cacheMgn.AddListener(types.CacheClient, []types.Listener{cacheProvider}) 266 } 267 268 namespaceSvr, err := namespace.GetServer() 269 if err != nil { 270 return err 271 } 272 273 opts := []service.InitOption{ 274 service.WithBatchController(bc), 275 service.WithStorage(s), 276 service.WithCacheManager(&cfg.Cache, cacheMgn), 277 service.WithHealthCheckSvr(healthCheckServer), 278 service.WithNamespaceSvr(namespaceSvr), 279 } 280 281 // 初始化服务模块 282 if err = service.Initialize(ctx, &cfg.Naming, opts...); err != nil { 283 return err 284 } 285 286 if _, err = service.GetServer(); err != nil { 287 return err 288 } 289 290 return nil 291 } 292 293 func parseConfDir(path string) string { 294 slashIndex := strings.LastIndex(path, "/") 295 if slashIndex == -1 { 296 return "./" 297 } 298 return path[0 : slashIndex+1] 299 } 300 301 // StartConfigCenterComponents 启动配置中心模块 302 func StartConfigCenterComponents(ctx context.Context, cfg *boot_config.Config, s store.Store, 303 cacheMgn *cache.CacheManager, userMgn auth.UserServer, strategyMgn auth.StrategyServer) error { 304 namespaceOperator, err := namespace.GetServer() 305 if err != nil { 306 return err 307 } 308 309 return config_center.Initialize(ctx, cfg.Config, s, cacheMgn, namespaceOperator, userMgn, strategyMgn) 310 } 311 312 // StartServers 启动server 313 func StartServers(ctx context.Context, cfg *boot_config.Config, errCh chan error) ( 314 []apiserver.Apiserver, error) { 315 // 启动API服务器 316 var servers []apiserver.Apiserver 317 318 // 等待所有ApiServer都监听完成 319 for _, protocol := range cfg.APIServers { 320 slot, exist := apiserver.Slots[protocol.Name] 321 if !exist { 322 log.Warn("[ERROR] apiserver slot not exists", zap.String("name", protocol.Name)) 323 continue 324 } 325 326 err := slot.Initialize(ctx, protocol.Option, protocol.API) 327 if err != nil { 328 fmt.Printf("[ERROR] %v\n", err) 329 return nil, fmt.Errorf("apiserver %s initialize err: %s", protocol.Name, err.Error()) 330 } 331 332 servers = append(servers, slot) 333 go slot.Run(errCh) 334 } 335 return servers, nil 336 } 337 338 // RestartServers 重启server 339 func RestartServers(errCh chan error) error { 340 // 重新加载配置 341 cfg, err := boot_config.Load(ConfigFilePath) 342 if err != nil { 343 log.Infof("restart servers, reload config") 344 return err 345 } 346 log.Infof("new config: %+v", cfg) 347 348 // 把配置的每个apiserver,进行重启 349 for _, protocol := range cfg.APIServers { 350 server, exist := apiserver.Slots[protocol.Name] 351 if !exist { 352 log.Errorf("api server slot %s not exists\n", protocol.Name) 353 return err 354 } 355 log.Infof("begin restarting server: %s", protocol.Name) 356 if err := server.Restart(protocol.Option, protocol.API, errCh); err != nil { 357 return err 358 } 359 } 360 return nil 361 } 362 363 // StopServers 接受外部信号,停止server 364 func StopServers(servers []apiserver.Apiserver) { 365 // stop health checkers 366 if nil != selfHeathChecker { 367 selfHeathChecker.Stop() 368 } 369 // deregister instances 370 SelfDeregister() 371 // 停掉服务 372 for _, s := range servers { 373 log.Infof("stop server protocol: %s", s.GetProtocol()) 374 s.Stop() 375 } 376 } 377 378 // StartBootstrapInOrder 开始进入启动加锁 379 // 原因:Server启动的时候会从数据库拉取大量数据,防止同时启动把DB压死 380 // 还有一种场景,server全部宕机批量重启,导致数据库被压死,导致雪崩 381 func StartBootstrapInOrder(s store.Store, c *boot_config.Config) (store.Transaction, error) { 382 order := c.Bootstrap.StartInOrder 383 log.Infof("[Bootstrap] get bootstrap order config: %+v", order) 384 open, _ := order["open"].(bool) 385 key, _ := order["key"].(string) 386 if !open || key == "" { 387 log.Infof("[Bootstrap] start in order config is not open or key is null") 388 return nil, nil 389 } 390 391 log.Infof("bootstrap start in order with key: %s", key) 392 393 // 启动一个日志协程,当等锁的时候,可以看到server正在等待锁 394 stopCh := make(chan struct{}) 395 defer close(stopCh) // 函数退出的时候,关闭stopCh 396 go func() { 397 ticker := time.NewTicker(time.Second * 10) 398 defer ticker.Stop() 399 for { 400 select { 401 case <-ticker.C: 402 log.Infof("bootstrap waiting the lock") 403 case <-stopCh: 404 return 405 } 406 } 407 }() 408 409 // 重试多次 410 maxTimes := 10 411 for i := 0; i < maxTimes; i++ { 412 tx, err := s.CreateTransaction() 413 if err != nil { 414 log.Errorf("create transaction err: %v", err) 415 return nil, err 416 } 417 // 这里可能会出现锁超时,超时则重试 418 if err := tx.LockBootstrap(key, utils.LocalHost); err != nil { 419 log.Errorf("lock bootstrap err: %s", err.Error()) 420 _ = tx.Commit() 421 continue 422 } 423 // 加锁成功,直接返回 424 log.Infof("lock bootstrap success") 425 return tx, nil 426 } 427 428 return nil, errors.New("lock bootstrap error") 429 } 430 431 // FinishBootstrapOrder 完成 提交锁 432 func FinishBootstrapOrder(tx store.Transaction) error { 433 if tx != nil { 434 return tx.Commit() 435 } 436 437 return nil 438 } 439 440 func genContext() context.Context { 441 ctx := context.Background() 442 reqCtx := context.WithValue(context.Background(), utils.ContextAuthTokenKey, "") 443 ctx = context.WithValue(ctx, utils.StringContext("request-id"), fmt.Sprintf("self-%d", time.Now().Nanosecond())) 444 ctx = context.WithValue(ctx, utils.ContextAuthContextKey, model.NewAcquireContext( 445 model.WithOperation(model.Read), model.WithModule(model.BootstrapModule), model.WithRequestContext(reqCtx))) 446 return ctx 447 } 448 449 // acquireLocalhost 探测获取本机IP地址 450 func acquireLocalhost(ctx context.Context, polarisService *boot_config.PolarisService) (context.Context, error) { 451 if polarisService == nil || !polarisService.EnableRegister { 452 log.Infof("[Bootstrap] polaris service config not found") 453 return ctx, nil 454 } 455 if len(polarisService.SelfAddress) != 0 { 456 utils.LocalHost = polarisService.SelfAddress 457 return utils.WithLocalhost(ctx, polarisService.SelfAddress), nil 458 } 459 if len(polarisService.NetworkInter) != 0 { 460 netInter, err := net.InterfaceByName(polarisService.NetworkInter) 461 if err != nil { 462 log.Errorf("get local host by network_interface: %s err: %s", polarisService.NetworkInter, err.Error()) 463 return nil, err 464 } 465 addrs, err := netInter.Addrs() 466 if err != nil { 467 log.Errorf("get local host by network_interface: %s err: %s", polarisService.NetworkInter, err.Error()) 468 return nil, err 469 } 470 for _, addr := range addrs { 471 if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { 472 if ipnet.IP.To4() != nil { 473 utils.LocalHost = ipnet.IP.String() 474 return utils.WithLocalhost(ctx, polarisService.SelfAddress), nil 475 } 476 } 477 } 478 } 479 480 localHost, err := getLocalHost(polarisService.ProbeAddress) 481 if err != nil { 482 log.Errorf("get local host err: %s", err.Error()) 483 return nil, err 484 } 485 log.Infof("[Bootstrap] get local host: %s", localHost) 486 utils.LocalHost = localHost 487 return utils.WithLocalhost(ctx, localHost), nil 488 } 489 490 func acquireLocalPort(ctx context.Context, apientries []apiserver.Config) { 491 for i := range apientries { 492 entry := apientries[i] 493 if entry.Name != "service-grpc" { 494 continue 495 } 496 port, _ := entry.Option["listenPort"].(int) 497 utils.LocalPort = port 498 break 499 } 500 } 501 502 // polarisServiceRegister 自注册主函数 503 func polarisServiceRegister(polarisService *boot_config.PolarisService, apiServers []apiserver.Config) error { 504 if polarisService == nil || !polarisService.EnableRegister { 505 log.Infof("[Bootstrap] not enable register the polaris service") 506 return nil 507 } 508 509 apiServerNames := make(map[string]bool) 510 for _, server := range apiServers { 511 apiServerNames[server.Name] = true 512 } 513 hbInterval := boot_config.DefaultHeartbeatInterval 514 if polarisService.HeartbeatInterval > 0 { 515 hbInterval = polarisService.HeartbeatInterval 516 } 517 // 开始注册每个服务 518 for _, svc := range polarisService.Services { 519 protocols := svc.Protocols 520 // 如果service.Protocols为空,默认采用apiServers的protocols注册,实际为配置中的Name字段, 521 // 如:grpcserver, httpserver, xdsserverv3,也隐式表达了协议的意思 522 if len(protocols) == 0 { 523 for _, server := range apiServers { 524 protocols = append(protocols, server.Name) 525 } 526 } 527 for _, name := range protocols { 528 if _, exist := apiServerNames[name]; !exist { 529 return fmt.Errorf("server(%s) not registered", name) 530 } 531 slot, exist := apiserver.Slots[name] 532 if !exist { 533 return fmt.Errorf("server(%s) not supported", name) 534 } 535 host := utils.LocalHost 536 port := slot.GetPort() 537 protocol := slot.GetProtocol() 538 if err := selfRegister(host, port, protocol, polarisService.Isolated, svc, hbInterval); err != nil { 539 log.Errorf("self register err: %s", err.Error()) 540 return err 541 } 542 543 log.Infof("self register success. host = %s, port = %d, protocol = %s, service = %s", 544 host, port, protocol, svc) 545 } 546 } 547 if len(SelfServiceInstance) > 0 && !polarisService.DisableHeartbeat { 548 log.Infof("start self health checker") 549 var err error 550 if selfHeathChecker, err = NewSelfHeathChecker(SelfServiceInstance, hbInterval); nil != err { 551 log.Errorf("self health checker err: %s", err.Error()) 552 return err 553 } 554 go selfHeathChecker.Start() 555 } 556 return nil 557 } 558 559 // selfRegister 服务自注册 560 func selfRegister( 561 host string, port uint32, protocol string, isolated bool, polarisService *boot_config.Service, hbInterval int) error { 562 server, err := service.GetOriginServer() 563 if err != nil { 564 return err 565 } 566 567 name := boot_config.DefaultPolarisName 568 polarisNamespace := boot_config.DefaultPolarisNamespace 569 if polarisService.Name != "" { 570 name = polarisService.Name 571 } 572 573 if polarisService.Namespace != "" { 574 polarisNamespace = polarisService.Namespace 575 } 576 577 metadata := polarisService.Metadata 578 if len(metadata) == 0 { 579 metadata = make(map[string]string) 580 } 581 metadata[model.MetaKeyBuildRevision] = version.GetRevision() 582 metadata[model.MetaKeyPolarisService] = name 583 584 req := &apiservice.Instance{ 585 Service: utils.NewStringValue(name), 586 Namespace: utils.NewStringValue(polarisNamespace), 587 Host: utils.NewStringValue(host), 588 Port: utils.NewUInt32Value(port), 589 Protocol: utils.NewStringValue(protocol), 590 Version: utils.NewStringValue(version.Get()), 591 EnableHealthCheck: utils.NewBoolValue(true), 592 Isolate: utils.NewBoolValue(isolated), 593 HealthCheck: &apiservice.HealthCheck{ 594 Type: apiservice.HealthCheck_HEARTBEAT, 595 Heartbeat: &apiservice.HeartbeatHealthCheck{ 596 Ttl: &wrappers.UInt32Value{Value: uint32(hbInterval)}, 597 }, 598 }, 599 Metadata: metadata, 600 } 601 602 resp := server.CreateInstance(genContext(), req) 603 if api.CalcCode(resp) != 200 { 604 // 如果self之前注册过,那么可以忽略 605 if resp.GetCode().GetValue() != api.ExistedResource { 606 return fmt.Errorf("%s", resp.GetInfo().GetValue()) 607 } 608 609 resp = server.UpdateInstance(genContext(), req) 610 if api.CalcCode(resp) != 200 { 611 return fmt.Errorf("%s", resp.GetInfo().GetValue()) 612 } 613 } 614 SelfServiceInstance = append(SelfServiceInstance, req) 615 616 return nil 617 } 618 619 // SelfDeregister Server退出的时候,自动反注册 620 func SelfDeregister() { 621 namingServer, err := service.GetOriginServer() 622 if err != nil { 623 log.Errorf("get naming server obj err: %s", err.Error()) 624 return 625 } 626 for _, req := range SelfServiceInstance { 627 log.Infof("Deregister the instance(%+v)", req) 628 if resp := namingServer.DeleteInstance(genContext(), req); api.CalcCode(resp) != 200 { 629 // 遇到失败,继续反注册其他的实例 630 log.Errorf("Deregister instance error: %s", resp.GetInfo().GetValue()) 631 } 632 } 633 // wait the async event handler to finish 634 time.Sleep(5 * time.Second) 635 } 636 637 // getLocalHost 获取本地IP地址 638 func getLocalHost(addr string) (string, error) { 639 if len(addr) == 0 { 640 return "127.0.0.1", nil 641 } 642 conn, err := net.Dial("tcp", addr) 643 if err != nil { 644 return "", err 645 } 646 defer func() { 647 _ = conn.Close() 648 }() 649 650 localAddr := conn.LocalAddr().String() // ip:port 651 segs := strings.Split(localAddr, ":") 652 if len(segs) != 2 { 653 return "", errors.New("get local address format is invalid") 654 } 655 656 return segs[0], nil 657 } 658 659 // getSelfRegisterPolarsServiceKeySet 获取自注册的系统服务集合 660 func getSelfRegisterPolarsServiceKeySet(polarisServiceCfg *boot_config.PolarisService) map[model.ServiceKey]struct{} { 661 if polarisServiceCfg == nil { 662 return nil 663 } 664 polarisServiceSet := make(map[model.ServiceKey]struct{}) 665 for _, svc := range polarisServiceCfg.Services { 666 ns, n := svc.Namespace, svc.Name 667 if ns == "" { 668 ns = boot_config.DefaultPolarisNamespace 669 } 670 polarisServiceSet[model.ServiceKey{Namespace: ns, Name: n}] = struct{}{} 671 } 672 return polarisServiceSet 673 }