github.com/polarismesh/polaris@v1.17.8/service/healthcheck/server.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package healthcheck
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"strconv"
    25  	"sync"
    26  	"time"
    27  
    28  	apimodel "github.com/polarismesh/specification/source/go/api/v1/model"
    29  	apiservice "github.com/polarismesh/specification/source/go/api/v1/service_manage"
    30  
    31  	cachetypes "github.com/polarismesh/polaris/cache/api"
    32  	api "github.com/polarismesh/polaris/common/api/v1"
    33  	"github.com/polarismesh/polaris/common/eventhub"
    34  	"github.com/polarismesh/polaris/common/model"
    35  	commontime "github.com/polarismesh/polaris/common/time"
    36  	"github.com/polarismesh/polaris/common/utils"
    37  	"github.com/polarismesh/polaris/plugin"
    38  	"github.com/polarismesh/polaris/service/batch"
    39  	"github.com/polarismesh/polaris/store"
    40  )
    41  
    42  var (
    43  	server     = new(Server)
    44  	once       = sync.Once{}
    45  	finishInit = false
    46  )
    47  
    48  // Server health checks the main server
    49  type Server struct {
    50  	hcOpt                *Config
    51  	storage              store.Store
    52  	defaultChecker       plugin.HealthChecker
    53  	checkers             map[int32]plugin.HealthChecker
    54  	cacheProvider        *CacheProvider
    55  	timeAdjuster         *TimeAdjuster
    56  	dispatcher           *Dispatcher
    57  	checkScheduler       *CheckScheduler
    58  	history              plugin.History
    59  	discoverEvent        plugin.DiscoverChannel
    60  	localHost            string
    61  	bc                   *batch.Controller
    62  	serviceCache         cachetypes.ServiceCache
    63  	instanceCache        cachetypes.InstanceCache
    64  	instanceEventChannel chan *model.InstanceEvent
    65  
    66  	subCtxs []*eventhub.SubscribtionContext
    67  }
    68  
    69  // Initialize 初始化
    70  func Initialize(ctx context.Context, hcOpt *Config, cacheOpen bool, bc *batch.Controller) error {
    71  	var err error
    72  	once.Do(func() {
    73  		err = initialize(ctx, hcOpt, cacheOpen, bc)
    74  	})
    75  
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	finishInit = true
    81  	return nil
    82  }
    83  
    84  func initialize(ctx context.Context, hcOpt *Config, cacheOpen bool, bc *batch.Controller) error {
    85  	server.hcOpt = hcOpt
    86  	if !cacheOpen {
    87  		return fmt.Errorf("[healthcheck]cache not open")
    88  	}
    89  	hcOpt.SetDefault()
    90  	if hcOpt.Open {
    91  		if len(hcOpt.Checkers) > 0 {
    92  			server.checkers = make(map[int32]plugin.HealthChecker, len(hcOpt.Checkers))
    93  			for _, entry := range hcOpt.Checkers {
    94  				checker := plugin.GetHealthChecker(entry.Name, &entry)
    95  				if checker == nil {
    96  					return fmt.Errorf("[healthcheck]unknown healthchecker %s", entry.Name)
    97  				}
    98  				// The same health type check plugin can only exist in one
    99  				_, exist := server.checkers[int32(checker.Type())]
   100  				if exist {
   101  					return fmt.Errorf("[healthcheck]duplicate healthchecker %s, checkType %d", entry.Name, checker.Type())
   102  				}
   103  				server.checkers[int32(checker.Type())] = checker
   104  				if nil == server.defaultChecker {
   105  					server.defaultChecker = checker
   106  				}
   107  			}
   108  		} else {
   109  			return fmt.Errorf("[healthcheck]no checker config")
   110  		}
   111  	}
   112  	var err error
   113  	if server.storage, err = store.GetStore(); err != nil {
   114  		return err
   115  	}
   116  
   117  	server.bc = bc
   118  	server.subCtxs = make([]*eventhub.SubscribtionContext, 0, 4)
   119  	server.localHost = hcOpt.LocalHost
   120  	server.history = plugin.GetHistory()
   121  	server.discoverEvent = plugin.GetDiscoverEvent()
   122  
   123  	server.cacheProvider = newCacheProvider(hcOpt.Service, server)
   124  	server.timeAdjuster = newTimeAdjuster(ctx, server.storage)
   125  	server.checkScheduler = newCheckScheduler(ctx, hcOpt.SlotNum, hcOpt.MinCheckInterval,
   126  		hcOpt.MaxCheckInterval, hcOpt.ClientCheckInterval, hcOpt.ClientCheckTtl)
   127  	server.dispatcher = newDispatcher(ctx, server)
   128  	return server.run(ctx)
   129  }
   130  
   131  func (s *Server) run(ctx context.Context) error {
   132  	if !s.isOpen() {
   133  		return nil
   134  	}
   135  
   136  	s.checkScheduler.run(ctx)
   137  	go s.timeAdjuster.doTimeAdjust(ctx)
   138  	s.dispatcher.startDispatchingJob(ctx)
   139  
   140  	s.instanceEventChannel = make(chan *model.InstanceEvent, 1000)
   141  	go s.handleInstanceEventWorker(ctx)
   142  
   143  	leaderChangeEventHandler := newLeaderChangeEventHandler(s.cacheProvider, s.hcOpt.MinCheckInterval)
   144  	subCtx, err := eventhub.Subscribe(eventhub.LeaderChangeEventTopic, leaderChangeEventHandler)
   145  	if err != nil {
   146  		return err
   147  	}
   148  	s.subCtxs = append(s.subCtxs, subCtx)
   149  
   150  	instanceEventHandler := newInstanceEventHealthCheckHandler(ctx, s.instanceEventChannel)
   151  	subCtx, err = eventhub.Subscribe(eventhub.InstanceEventTopic, instanceEventHandler)
   152  	if err != nil {
   153  		return err
   154  	}
   155  	s.subCtxs = append(s.subCtxs, subCtx)
   156  
   157  	if err := s.storage.StartLeaderElection(store.ElectionKeySelfServiceChecker); err != nil {
   158  		return err
   159  	}
   160  	return nil
   161  }
   162  
   163  // Report heartbeat request
   164  func (s *Server) Report(ctx context.Context, req *apiservice.Instance) *apiservice.Response {
   165  	return s.doReport(ctx, req)
   166  }
   167  
   168  // Reports batch report heartbeat request
   169  func (s *Server) Reports(ctx context.Context, req []*apiservice.InstanceHeartbeat) *apiservice.Response {
   170  	return s.doReports(ctx, req)
   171  }
   172  
   173  // ReportByClient report heartbeat request by client
   174  func (s *Server) ReportByClient(ctx context.Context, req *apiservice.Client) *apiservice.Response {
   175  	return s.doReportByClient(ctx, req)
   176  }
   177  
   178  func (s *Server) Destroy() {
   179  	for i := range s.subCtxs {
   180  		s.subCtxs[i].Cancel()
   181  	}
   182  }
   183  
   184  // GetServer 获取已经初始化好的Server
   185  func GetServer() (*Server, error) {
   186  	if !finishInit {
   187  		return nil, errors.New("server has not done InitializeServer")
   188  	}
   189  
   190  	return server, nil
   191  }
   192  
   193  // SetServer for test only
   194  func SetServer(srv *Server) {
   195  	server = srv
   196  }
   197  
   198  // SetServiceCache 设置服务缓存
   199  func (s *Server) SetServiceCache(serviceCache cachetypes.ServiceCache) {
   200  	s.serviceCache = serviceCache
   201  }
   202  
   203  // SetInstanceCache 设置服务实例缓存
   204  func (s *Server) SetInstanceCache(instanceCache cachetypes.InstanceCache) {
   205  	s.instanceCache = instanceCache
   206  }
   207  
   208  // CacheProvider get cache provider
   209  func (s *Server) CacheProvider() (*CacheProvider, error) {
   210  	if !finishInit {
   211  		return nil, errors.New("cache provider has not done InitializeServer")
   212  	}
   213  	return s.cacheProvider, nil
   214  }
   215  
   216  // ListCheckerServer get checker server instance list
   217  func (s *Server) ListCheckerServer() []*model.Instance {
   218  	ret := make([]*model.Instance, 0, s.cacheProvider.selfServiceInstances.Count())
   219  	s.cacheProvider.selfServiceInstances.Range(func(instanceId string, value ItemWithChecker) {
   220  		ret = append(ret, value.GetInstance())
   221  	})
   222  	return ret
   223  }
   224  
   225  // RecordHistory server对外提供history插件的简单封装
   226  func (s *Server) RecordHistory(entry *model.RecordEntry) {
   227  	// 如果插件没有初始化,那么不记录history
   228  	if s.history == nil {
   229  		return
   230  	}
   231  	// 如果数据为空,则不需要打印了
   232  	if entry == nil {
   233  		return
   234  	}
   235  
   236  	// 调用插件记录history
   237  	s.history.Record(entry)
   238  }
   239  
   240  // publishInstanceEvent 发布服务事件
   241  func (s *Server) publishInstanceEvent(serviceID string, event model.InstanceEvent) {
   242  	event.SvcId = serviceID
   243  	if event.Instance != nil {
   244  		// event.Instance = proto.Clone(event.Instance).(*apiservice.Instance)
   245  	}
   246  	_ = eventhub.Publish(eventhub.InstanceEventTopic, event)
   247  }
   248  
   249  // GetLastHeartbeat 获取上一次心跳的时间
   250  func (s *Server) GetLastHeartbeat(req *apiservice.Instance) *apiservice.Response {
   251  	if len(s.checkers) == 0 {
   252  		return api.NewResponse(apimodel.Code_HealthCheckNotOpen)
   253  	}
   254  	id, errRsp := checkHeartbeatInstance(req)
   255  	if errRsp != nil {
   256  		return errRsp
   257  	}
   258  	req.Id = utils.NewStringValue(id)
   259  	insCache := s.cacheProvider.GetInstance(id)
   260  	if insCache == nil {
   261  		return api.NewInstanceResponse(apimodel.Code_NotFoundResource, req)
   262  	}
   263  	checker, ok := s.checkers[int32(insCache.HealthCheck().GetType())]
   264  	if !ok {
   265  		return api.NewInstanceResponse(apimodel.Code_HeartbeatTypeNotFound, req)
   266  	}
   267  	queryResp, err := checker.Query(context.Background(), &plugin.QueryRequest{
   268  		InstanceId: insCache.ID(),
   269  		Host:       insCache.Host(),
   270  		Port:       insCache.Port(),
   271  	})
   272  	if err != nil {
   273  		return api.NewInstanceRespWithError(apimodel.Code_ExecuteException, err, req)
   274  	}
   275  	req.Service = insCache.Proto.GetService()
   276  	req.Namespace = insCache.Proto.GetNamespace()
   277  	req.Host = insCache.Proto.GetHost()
   278  	req.Port = insCache.Proto.Port
   279  	req.VpcId = insCache.Proto.GetVpcId()
   280  	req.HealthCheck = insCache.Proto.GetHealthCheck()
   281  	req.Metadata = make(map[string]string, 3)
   282  	req.Metadata["last-heartbeat-timestamp"] = strconv.Itoa(int(queryResp.LastHeartbeatSec))
   283  	req.Metadata["last-heartbeat-time"] = commontime.Time2String(time.Unix(queryResp.LastHeartbeatSec, 0))
   284  	req.Metadata["system-time"] = commontime.Time2String(time.Unix(currentTimeSec(), 0))
   285  	return api.NewInstanceResponse(apimodel.Code_ExecuteSuccess, req)
   286  }
   287  
   288  func (s *Server) handleInstanceEventWorker(ctx context.Context) {
   289  	for {
   290  		select {
   291  		case event := <-s.instanceEventChannel:
   292  			switch event.EType {
   293  			case model.EventInstanceOffline:
   294  				insCache := s.cacheProvider.GetInstance(event.Id)
   295  				if insCache == nil {
   296  					log.Errorf("[Health Check] cannot get instance from cache, instance id is %s", event.Id)
   297  					break
   298  				}
   299  				checker, ok := s.checkers[int32(insCache.HealthCheck().GetType())]
   300  				if !ok {
   301  					log.Errorf("[Health Check]heart beat type not found checkType %d",
   302  						int32(insCache.HealthCheck().GetType()))
   303  					break
   304  				}
   305  				log.Infof("[Health Check]delete instance heart beat information, id is %s", event.Id)
   306  				err := checker.Delete(context.Background(), event.Id)
   307  				if err != nil {
   308  					log.Errorf("[Health Check]addr is %s:%d, id is %s, delete err is %s",
   309  						insCache.Host(), insCache.Port(), insCache.ID(), err)
   310  				}
   311  			}
   312  		case <-ctx.Done():
   313  			log.Infof("[Health Check]instance event handler loop stopped")
   314  			return
   315  		}
   316  	}
   317  }
   318  
   319  // Checkers get all health checker, for test only
   320  func (s *Server) Checkers() map[int32]plugin.HealthChecker {
   321  	return s.checkers
   322  }
   323  
   324  func (s *Server) isOpen() bool {
   325  	return s.hcOpt.Open
   326  }
   327  
   328  func currentTimeSec() int64 {
   329  	return time.Now().Unix() - server.timeAdjuster.GetDiff()
   330  }