github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/framework/heartbeat/heartbeat.go (about)

     1  package heartbeat
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/mdaxf/iac/com"
    10  	"github.com/mdaxf/iac/logger"
    11  )
    12  
    13  var (
    14  	ErrServiceUnavailable = errors.New("service is unavailable")
    15  )
    16  
    17  // HeartbeatManager manages heartbeat checks for various services.
    18  type HeartbeatManager struct {
    19  	checkers map[string]com.HeartbeatChecker
    20  	mu       sync.RWMutex
    21  }
    22  
    23  // NewHeartbeatManager creates a new HeartbeatManager instance.
    24  func NewHeartbeatManager() *HeartbeatManager {
    25  	return &HeartbeatManager{
    26  		checkers: make(map[string]com.HeartbeatChecker),
    27  	}
    28  }
    29  
    30  // RegisterChecker registers a new HeartbeatChecker with a unique name.
    31  func (m *HeartbeatManager) RegisterChecker(name string, checker com.HeartbeatChecker) {
    32  	ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "HeartbeatManager"}
    33  
    34  	startTime := time.Now()
    35  	defer func() {
    36  		elapsed := time.Since(startTime)
    37  		ilog.PerformanceWithDuration("RegisterChecker", elapsed)
    38  	}()
    39  	defer func() {
    40  		if err := recover(); err != nil {
    41  			ilog.Error(fmt.Sprintf("There is error to RegisterChecker %s with error: %s", name, err))
    42  
    43  			return
    44  		}
    45  	}()
    46  
    47  	ilog.Debug(fmt.Sprintf("RegisterChecker %s", name))
    48  
    49  	m.mu.Lock()
    50  	defer m.mu.Unlock()
    51  	m.checkers[name] = checker
    52  }
    53  
    54  // StartHeartbeatChecks starts the heartbeat checks for all registered services.
    55  func (m *HeartbeatManager) StartHeartbeatChecks(interval time.Duration) {
    56  	ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "StartHeartbeatChecks"}
    57  
    58  	startTime := time.Now()
    59  	defer func() {
    60  		elapsed := time.Since(startTime)
    61  		ilog.PerformanceWithDuration("StartHeartbeatChecks", elapsed)
    62  	}()
    63  	defer func() {
    64  		if err := recover(); err != nil {
    65  			ilog.Error(fmt.Sprintf("There is error to StartHeartbeatChecks with error: %s", err))
    66  
    67  			return
    68  		}
    69  	}()
    70  
    71  	for name, checker := range m.checkers {
    72  		go m.startHeartbeatCheck(name, checker, interval)
    73  	}
    74  }
    75  
    76  func (m *HeartbeatManager) startHeartbeatCheck(name string, checker com.HeartbeatChecker, interval time.Duration) {
    77  	ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "startHeartbeatCheck"}
    78  
    79  	startTime := time.Now()
    80  	defer func() {
    81  		elapsed := time.Since(startTime)
    82  		ilog.PerformanceWithDuration("startHeartbeatCheck", elapsed)
    83  	}()
    84  	defer func() {
    85  		if err := recover(); err != nil {
    86  			ilog.Error(fmt.Sprintf("There is error to startHeartbeatCheck %s with error: %s", name, err))
    87  
    88  			return
    89  		}
    90  	}()
    91  
    92  	ticker := time.NewTicker(interval)
    93  	defer ticker.Stop()
    94  
    95  	for {
    96  		select {
    97  		case <-ticker.C:
    98  			err := checker.Ping()
    99  			if err != nil {
   100  				m.handleServiceUnavailable(name, checker)
   101  			}
   102  		}
   103  	}
   104  }
   105  
   106  func (m *HeartbeatManager) handleServiceUnavailable(name string, checker com.HeartbeatChecker) {
   107  	ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "handleServiceUnavailable"}
   108  
   109  	startTime := time.Now()
   110  	defer func() {
   111  		elapsed := time.Since(startTime)
   112  		ilog.PerformanceWithDuration("handleServiceUnavailable", elapsed)
   113  	}()
   114  
   115  	defer func() {
   116  		if err := recover(); err != nil {
   117  			ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err))
   118  
   119  			return
   120  		}
   121  	}()
   122  
   123  	m.mu.RLock()
   124  	defer m.mu.RUnlock()
   125  
   126  	ilog.Error(fmt.Sprintf("Service %s is unavailable", name))
   127  
   128  	err := checker.Disconnect()
   129  	if err != nil {
   130  		ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err))
   131  	}
   132  
   133  	tries := 0
   134  	maxTries := 3
   135  
   136  	for {
   137  		err := checker.ReConnect()
   138  		if err == nil {
   139  			// Service reconnected
   140  			break
   141  		}
   142  		// Handle connection error and retry
   143  		ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err))
   144  
   145  		tries++
   146  		if tries >= maxTries {
   147  			ilog.Error(fmt.Sprintf("Service %s is still unavailable after %d tries", name, tries))
   148  			return
   149  		}
   150  		time.Sleep(5 * time.Second)
   151  	}
   152  }