github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/framework/heartbeat/heartbeat.go (about) 1 package heartbeat 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/mdaxf/iac/com" 10 "github.com/mdaxf/iac/logger" 11 ) 12 13 var ( 14 ErrServiceUnavailable = errors.New("service is unavailable") 15 ) 16 17 // HeartbeatManager manages heartbeat checks for various services. 18 type HeartbeatManager struct { 19 checkers map[string]com.HeartbeatChecker 20 mu sync.RWMutex 21 } 22 23 // NewHeartbeatManager creates a new HeartbeatManager instance. 24 func NewHeartbeatManager() *HeartbeatManager { 25 return &HeartbeatManager{ 26 checkers: make(map[string]com.HeartbeatChecker), 27 } 28 } 29 30 // RegisterChecker registers a new HeartbeatChecker with a unique name. 31 func (m *HeartbeatManager) RegisterChecker(name string, checker com.HeartbeatChecker) { 32 ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "HeartbeatManager"} 33 34 startTime := time.Now() 35 defer func() { 36 elapsed := time.Since(startTime) 37 ilog.PerformanceWithDuration("RegisterChecker", elapsed) 38 }() 39 defer func() { 40 if err := recover(); err != nil { 41 ilog.Error(fmt.Sprintf("There is error to RegisterChecker %s with error: %s", name, err)) 42 43 return 44 } 45 }() 46 47 ilog.Debug(fmt.Sprintf("RegisterChecker %s", name)) 48 49 m.mu.Lock() 50 defer m.mu.Unlock() 51 m.checkers[name] = checker 52 } 53 54 // StartHeartbeatChecks starts the heartbeat checks for all registered services. 55 func (m *HeartbeatManager) StartHeartbeatChecks(interval time.Duration) { 56 ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "StartHeartbeatChecks"} 57 58 startTime := time.Now() 59 defer func() { 60 elapsed := time.Since(startTime) 61 ilog.PerformanceWithDuration("StartHeartbeatChecks", elapsed) 62 }() 63 defer func() { 64 if err := recover(); err != nil { 65 ilog.Error(fmt.Sprintf("There is error to StartHeartbeatChecks with error: %s", err)) 66 67 return 68 } 69 }() 70 71 for name, checker := range m.checkers { 72 go m.startHeartbeatCheck(name, checker, interval) 73 } 74 } 75 76 func (m *HeartbeatManager) startHeartbeatCheck(name string, checker com.HeartbeatChecker, interval time.Duration) { 77 ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "startHeartbeatCheck"} 78 79 startTime := time.Now() 80 defer func() { 81 elapsed := time.Since(startTime) 82 ilog.PerformanceWithDuration("startHeartbeatCheck", elapsed) 83 }() 84 defer func() { 85 if err := recover(); err != nil { 86 ilog.Error(fmt.Sprintf("There is error to startHeartbeatCheck %s with error: %s", name, err)) 87 88 return 89 } 90 }() 91 92 ticker := time.NewTicker(interval) 93 defer ticker.Stop() 94 95 for { 96 select { 97 case <-ticker.C: 98 err := checker.Ping() 99 if err != nil { 100 m.handleServiceUnavailable(name, checker) 101 } 102 } 103 } 104 } 105 106 func (m *HeartbeatManager) handleServiceUnavailable(name string, checker com.HeartbeatChecker) { 107 ilog := logger.Log{ModuleName: logger.Framework, User: "System", ControllerName: "handleServiceUnavailable"} 108 109 startTime := time.Now() 110 defer func() { 111 elapsed := time.Since(startTime) 112 ilog.PerformanceWithDuration("handleServiceUnavailable", elapsed) 113 }() 114 115 defer func() { 116 if err := recover(); err != nil { 117 ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err)) 118 119 return 120 } 121 }() 122 123 m.mu.RLock() 124 defer m.mu.RUnlock() 125 126 ilog.Error(fmt.Sprintf("Service %s is unavailable", name)) 127 128 err := checker.Disconnect() 129 if err != nil { 130 ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err)) 131 } 132 133 tries := 0 134 maxTries := 3 135 136 for { 137 err := checker.ReConnect() 138 if err == nil { 139 // Service reconnected 140 break 141 } 142 // Handle connection error and retry 143 ilog.Error(fmt.Sprintf("There is error to handleServiceUnavailable %s with error: %s", name, err)) 144 145 tries++ 146 if tries >= maxTries { 147 ilog.Error(fmt.Sprintf("Service %s is still unavailable after %d tries", name, tries)) 148 return 149 } 150 time.Sleep(5 * time.Second) 151 } 152 }