github.com/polarismesh/polaris@v1.17.8/service/healthcheck/server.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package healthcheck 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "strconv" 25 "sync" 26 "time" 27 28 apimodel "github.com/polarismesh/specification/source/go/api/v1/model" 29 apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage" 30 31 cachetypes "github.com/polarismesh/polaris/cache/api" 32 api "github.com/polarismesh/polaris/common/api/v1" 33 "github.com/polarismesh/polaris/common/eventhub" 34 "github.com/polarismesh/polaris/common/model" 35 commontime "github.com/polarismesh/polaris/common/time" 36 "github.com/polarismesh/polaris/common/utils" 37 "github.com/polarismesh/polaris/plugin" 38 "github.com/polarismesh/polaris/service/batch" 39 "github.com/polarismesh/polaris/store" 40 ) 41 42 var ( 43 server = new(Server) 44 once = sync.Once{} 45 finishInit = false 46 ) 47 48 // Server health checks the main server 49 type Server struct { 50 hcOpt *Config 51 storage store.Store 52 defaultChecker plugin.HealthChecker 53 checkers map[int32]plugin.HealthChecker 54 cacheProvider *CacheProvider 55 timeAdjuster *TimeAdjuster 56 dispatcher *Dispatcher 57 checkScheduler *CheckScheduler 58 history plugin.History 59 discoverEvent plugin.DiscoverChannel 60 localHost string 61 bc *batch.Controller 62 serviceCache cachetypes.ServiceCache 63 instanceCache cachetypes.InstanceCache 64 instanceEventChannel chan *model.InstanceEvent 65 66 subCtxs []*eventhub.SubscribtionContext 67 } 68 69 // Initialize 初始化 70 func Initialize(ctx context.Context, hcOpt *Config, cacheOpen bool, bc *batch.Controller) error { 71 var err error 72 once.Do(func() { 73 err = initialize(ctx, hcOpt, cacheOpen, bc) 74 }) 75 76 if err != nil { 77 return err 78 } 79 80 finishInit = true 81 return nil 82 } 83 84 func initialize(ctx context.Context, hcOpt *Config, cacheOpen bool, bc *batch.Controller) error { 85 server.hcOpt = hcOpt 86 if !cacheOpen { 87 return fmt.Errorf("[healthcheck]cache not open") 88 } 89 hcOpt.SetDefault() 90 if hcOpt.Open { 91 if len(hcOpt.Checkers) > 0 { 92 server.checkers = make(map[int32]plugin.HealthChecker, len(hcOpt.Checkers)) 93 for _, entry := range hcOpt.Checkers { 94 checker := plugin.GetHealthChecker(entry.Name, &entry) 95 if checker == nil { 96 return fmt.Errorf("[healthcheck]unknown healthchecker %s", entry.Name) 97 } 98 // The same health type check plugin can only exist in one 99 _, exist := server.checkers[int32(checker.Type())] 100 if exist { 101 return fmt.Errorf("[healthcheck]duplicate healthchecker %s, checkType %d", entry.Name, checker.Type()) 102 } 103 server.checkers[int32(checker.Type())] = checker 104 if nil == server.defaultChecker { 105 server.defaultChecker = checker 106 } 107 } 108 } else { 109 return fmt.Errorf("[healthcheck]no checker config") 110 } 111 } 112 var err error 113 if server.storage, err = store.GetStore(); err != nil { 114 return err 115 } 116 117 server.bc = bc 118 server.subCtxs = make([]*eventhub.SubscribtionContext, 0, 4) 119 server.localHost = hcOpt.LocalHost 120 server.history = plugin.GetHistory() 121 server.discoverEvent = plugin.GetDiscoverEvent() 122 123 server.cacheProvider = newCacheProvider(hcOpt.Service, server) 124 server.timeAdjuster = newTimeAdjuster(ctx, server.storage) 125 server.checkScheduler = newCheckScheduler(ctx, hcOpt.SlotNum, hcOpt.MinCheckInterval, 126 hcOpt.MaxCheckInterval, hcOpt.ClientCheckInterval, hcOpt.ClientCheckTtl) 127 server.dispatcher = newDispatcher(ctx, server) 128 return server.run(ctx) 129 } 130 131 func (s *Server) run(ctx context.Context) error { 132 if !s.isOpen() { 133 return nil 134 } 135 136 s.checkScheduler.run(ctx) 137 go s.timeAdjuster.doTimeAdjust(ctx) 138 s.dispatcher.startDispatchingJob(ctx) 139 140 s.instanceEventChannel = make(chan *model.InstanceEvent, 1000) 141 go s.handleInstanceEventWorker(ctx) 142 143 leaderChangeEventHandler := newLeaderChangeEventHandler(s.cacheProvider, s.hcOpt.MinCheckInterval) 144 subCtx, err := eventhub.Subscribe(eventhub.LeaderChangeEventTopic, leaderChangeEventHandler) 145 if err != nil { 146 return err 147 } 148 s.subCtxs = append(s.subCtxs, subCtx) 149 150 instanceEventHandler := newInstanceEventHealthCheckHandler(ctx, s.instanceEventChannel) 151 subCtx, err = eventhub.Subscribe(eventhub.InstanceEventTopic, instanceEventHandler) 152 if err != nil { 153 return err 154 } 155 s.subCtxs = append(s.subCtxs, subCtx) 156 157 if err := s.storage.StartLeaderElection(store.ElectionKeySelfServiceChecker); err != nil { 158 return err 159 } 160 return nil 161 } 162 163 // Report heartbeat request 164 func (s *Server) Report(ctx context.Context, req *apiservice.Instance) *apiservice.Response { 165 return s.doReport(ctx, req) 166 } 167 168 // Reports batch report heartbeat request 169 func (s *Server) Reports(ctx context.Context, req []*apiservice.InstanceHeartbeat) *apiservice.Response { 170 return s.doReports(ctx, req) 171 } 172 173 // ReportByClient report heartbeat request by client 174 func (s *Server) ReportByClient(ctx context.Context, req *apiservice.Client) *apiservice.Response { 175 return s.doReportByClient(ctx, req) 176 } 177 178 func (s *Server) Destroy() { 179 for i := range s.subCtxs { 180 s.subCtxs[i].Cancel() 181 } 182 } 183 184 // GetServer 获取已经初始化好的Server 185 func GetServer() (*Server, error) { 186 if !finishInit { 187 return nil, errors.New("server has not done InitializeServer") 188 } 189 190 return server, nil 191 } 192 193 // SetServer for test only 194 func SetServer(srv *Server) { 195 server = srv 196 } 197 198 // SetServiceCache 设置服务缓存 199 func (s *Server) SetServiceCache(serviceCache cachetypes.ServiceCache) { 200 s.serviceCache = serviceCache 201 } 202 203 // SetInstanceCache 设置服务实例缓存 204 func (s *Server) SetInstanceCache(instanceCache cachetypes.InstanceCache) { 205 s.instanceCache = instanceCache 206 } 207 208 // CacheProvider get cache provider 209 func (s *Server) CacheProvider() (*CacheProvider, error) { 210 if !finishInit { 211 return nil, errors.New("cache provider has not done InitializeServer") 212 } 213 return s.cacheProvider, nil 214 } 215 216 // ListCheckerServer get checker server instance list 217 func (s *Server) ListCheckerServer() []*model.Instance { 218 ret := make([]*model.Instance, 0, s.cacheProvider.selfServiceInstances.Count()) 219 s.cacheProvider.selfServiceInstances.Range(func(instanceId string, value ItemWithChecker) { 220 ret = append(ret, value.GetInstance()) 221 }) 222 return ret 223 } 224 225 // RecordHistory server对外提供history插件的简单封装 226 func (s *Server) RecordHistory(entry *model.RecordEntry) { 227 // 如果插件没有初始化,那么不记录history 228 if s.history == nil { 229 return 230 } 231 // 如果数据为空,则不需要打印了 232 if entry == nil { 233 return 234 } 235 236 // 调用插件记录history 237 s.history.Record(entry) 238 } 239 240 // publishInstanceEvent 发布服务事件 241 func (s *Server) publishInstanceEvent(serviceID string, event model.InstanceEvent) { 242 event.SvcId = serviceID 243 if event.Instance != nil { 244 // event.Instance = proto.Clone(event.Instance).(*apiservice.Instance) 245 } 246 _ = eventhub.Publish(eventhub.InstanceEventTopic, event) 247 } 248 249 // GetLastHeartbeat 获取上一次心跳的时间 250 func (s *Server) GetLastHeartbeat(req *apiservice.Instance) *apiservice.Response { 251 if len(s.checkers) == 0 { 252 return api.NewResponse(apimodel.Code_HealthCheckNotOpen) 253 } 254 id, errRsp := checkHeartbeatInstance(req) 255 if errRsp != nil { 256 return errRsp 257 } 258 req.Id = utils.NewStringValue(id) 259 insCache := s.cacheProvider.GetInstance(id) 260 if insCache == nil { 261 return api.NewInstanceResponse(apimodel.Code_NotFoundResource, req) 262 } 263 checker, ok := s.checkers[int32(insCache.HealthCheck().GetType())] 264 if !ok { 265 return api.NewInstanceResponse(apimodel.Code_HeartbeatTypeNotFound, req) 266 } 267 queryResp, err := checker.Query(context.Background(), &plugin.QueryRequest{ 268 InstanceId: insCache.ID(), 269 Host: insCache.Host(), 270 Port: insCache.Port(), 271 }) 272 if err != nil { 273 return api.NewInstanceRespWithError(apimodel.Code_ExecuteException, err, req) 274 } 275 req.Service = insCache.Proto.GetService() 276 req.Namespace = insCache.Proto.GetNamespace() 277 req.Host = insCache.Proto.GetHost() 278 req.Port = insCache.Proto.Port 279 req.VpcId = insCache.Proto.GetVpcId() 280 req.HealthCheck = insCache.Proto.GetHealthCheck() 281 req.Metadata = make(map[string]string, 3) 282 req.Metadata["last-heartbeat-timestamp"] = strconv.Itoa(int(queryResp.LastHeartbeatSec)) 283 req.Metadata["last-heartbeat-time"] = commontime.Time2String(time.Unix(queryResp.LastHeartbeatSec, 0)) 284 req.Metadata["system-time"] = commontime.Time2String(time.Unix(currentTimeSec(), 0)) 285 return api.NewInstanceResponse(apimodel.Code_ExecuteSuccess, req) 286 } 287 288 func (s *Server) handleInstanceEventWorker(ctx context.Context) { 289 for { 290 select { 291 case event := <-s.instanceEventChannel: 292 switch event.EType { 293 case model.EventInstanceOffline: 294 insCache := s.cacheProvider.GetInstance(event.Id) 295 if insCache == nil { 296 log.Errorf("[Health Check] cannot get instance from cache, instance id is %s", event.Id) 297 break 298 } 299 checker, ok := s.checkers[int32(insCache.HealthCheck().GetType())] 300 if !ok { 301 log.Errorf("[Health Check]heart beat type not found checkType %d", 302 int32(insCache.HealthCheck().GetType())) 303 break 304 } 305 log.Infof("[Health Check]delete instance heart beat information, id is %s", event.Id) 306 err := checker.Delete(context.Background(), event.Id) 307 if err != nil { 308 log.Errorf("[Health Check]addr is %s:%d, id is %s, delete err is %s", 309 insCache.Host(), insCache.Port(), insCache.ID(), err) 310 } 311 } 312 case <-ctx.Done(): 313 log.Infof("[Health Check]instance event handler loop stopped") 314 return 315 } 316 } 317 } 318 319 // Checkers get all health checker, for test only 320 func (s *Server) Checkers() map[int32]plugin.HealthChecker { 321 return s.checkers 322 } 323 324 func (s *Server) isOpen() bool { 325 return s.hcOpt.Open 326 } 327 328 func currentTimeSec() int64 { 329 return time.Now().Unix() - server.timeAdjuster.GetDiff() 330 }