github.com/aergoio/aergo@v1.3.1/polaris/server/healthcheck.go (about)

     1  /*
     2   * @file
     3   * @copyright defined in aergo/LICENSE.txt
     4   */
     5  
     6  package server
     7  
     8  import (
     9  	"github.com/aergoio/aergo/polaris/common"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/aergoio/aergo-lib/log"
    14  	"github.com/aergoio/aergo/p2p/p2pcommon"
    15  )
    16  
    17  type HealthCheckManager interface {
    18  	Start()
    19  	Stop()
    20  }
    21  
    22  type healthCheckManager struct {
    23  	logger *log.Logger
    24  	ms     mapService
    25  	nt     p2pcommon.NetworkTransport
    26  	finish chan interface{}
    27  
    28  	workerCnt int
    29  }
    30  
    31  var _ HealthCheckManager = (*healthCheckManager)(nil)
    32  
    33  func (hcm *healthCheckManager) Start() {
    34  	go hcm.runHCM()
    35  }
    36  
    37  func (hcm *healthCheckManager) Stop() {
    38  	hcm.finish <- struct{}{}
    39  }
    40  
    41  func NewHCM(mapService *PeerMapService, nt p2pcommon.NetworkTransport) *healthCheckManager {
    42  	hcm := &healthCheckManager{ms: mapService, nt: nt, logger: mapService.Logger, workerCnt: ConcurrentHealthCheckCount,
    43  		finish: make(chan interface{}, 1)}
    44  
    45  	return hcm
    46  }
    47  
    48  func (hcm *healthCheckManager) runHCM() {
    49  	ticker := time.NewTicker(time.Minute)
    50  	for {
    51  		select {
    52  		case <-ticker.C:
    53  			hcm.checkPeers()
    54  
    55  		case <-hcm.finish:
    56  			break
    57  		}
    58  	}
    59  
    60  	hcm.logger.Info().Msg("Healthchecker manager finished")
    61  }
    62  
    63  func (hcm *healthCheckManager) checkPeers() {
    64  	checkers := hcm.ms.getPeerCheckers()
    65  	thresholdTime := time.Now().Add(PeerHealthcheckInterval)
    66  	toCheck := make([]peerChecker, 0, len(checkers)>>2)
    67  	for _, ps := range checkers {
    68  		if ps.lastCheck().Before(thresholdTime) {
    69  			toCheck = append(toCheck, ps)
    70  		}
    71  	}
    72  
    73  	hcm.logger.Debug().Int("all_peers", len(checkers)).Int("check_peers", len(toCheck)).Msg("Starting peers health check")
    74  	wg := &sync.WaitGroup{}
    75  	wg.Add(len(toCheck))
    76  	for _, ps := range toCheck {
    77  		// TODO make a pool and limit count of concurrent pings
    78  		go ps.check(wg, common.PolarisConnectionTTL)
    79  	}
    80  	wg.Wait()
    81  	hcm.logger.Debug().Msg("Finished checks")
    82  }