github.com/splucs/witchcraft-go-server@v1.7.0/status/health/periodic/source.go (about) 1 // Copyright (c) 2018 Palantir Technologies. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package periodic 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 "time" 22 23 "github.com/palantir/witchcraft-go-logging/wlog/wapp" 24 "github.com/palantir/witchcraft-go-server/conjure/witchcraft/api/health" 25 "github.com/palantir/witchcraft-go-server/status" 26 ) 27 28 type CheckFunc func(ctx context.Context) *health.HealthCheckResult 29 30 type Source struct { 31 Checks map[health.CheckType]CheckFunc 32 } 33 34 type checkState struct { 35 lastResult *health.HealthCheckResult 36 lastResultTime time.Time 37 lastSuccess *health.HealthCheckResult 38 lastSuccessTime time.Time 39 } 40 41 type healthCheckSource struct { 42 // static 43 source Source 44 gracePeriod time.Duration 45 retryInterval time.Duration 46 initialPoll bool 47 48 // mutable 49 mutex sync.RWMutex 50 checkStates map[health.CheckType]*checkState 51 } 52 53 // NewHealthCheckSource creates a health check source that calls poll every retryInterval in a goroutine. The goroutine 54 // is cancelled if ctx is cancelled. If gracePeriod elapses without poll returning nil, the returned health check 55 // source will give a health status of error. checkType is the key to be used in the health result returned by the 56 // health check source. 57 func NewHealthCheckSource(ctx context.Context, gracePeriod time.Duration, retryInterval time.Duration, checkType health.CheckType, poll func() error, options ...Option) status.HealthCheckSource { 58 return FromHealthCheckSource(ctx, gracePeriod, retryInterval, newDefaultHealthCheckSource(checkType, poll), options...) 59 } 60 61 // FromHealthCheckSource creates a health check source that calls the the provided Source.Checks functions every 62 // retryInterval in a goroutine. The goroutine is cancelled if ctx is cancelled. For each check, if gracePeriod elapses 63 // without CheckFunc returning HEALTHY, the returned health check source's HealthStatus will return a HealthCheckResult 64 // of error. 65 func FromHealthCheckSource(ctx context.Context, gracePeriod time.Duration, retryInterval time.Duration, source Source, options ...Option) status.HealthCheckSource { 66 checker := &healthCheckSource{ 67 source: source, 68 gracePeriod: gracePeriod, 69 retryInterval: retryInterval, 70 checkStates: map[health.CheckType]*checkState{}, 71 } 72 for _, option := range options { 73 option.apply(checker) 74 } 75 go wapp.RunWithRecoveryLogging(ctx, checker.runPoll) 76 return checker 77 } 78 79 func (h *healthCheckSource) HealthStatus(ctx context.Context) health.HealthStatus { 80 h.mutex.RLock() 81 defer h.mutex.RUnlock() 82 83 results := make([]health.HealthCheckResult, 0, len(h.source.Checks)) 84 for checkType := range h.source.Checks { 85 checkState, ok := h.checkStates[checkType] 86 if !ok { 87 results = append(results, health.HealthCheckResult{ 88 Type: checkType, 89 State: health.HealthStateRepairing, 90 Message: stringPtr("Check has not yet run"), 91 }) 92 continue 93 } 94 var result health.HealthCheckResult 95 switch { 96 case time.Since(checkState.lastSuccessTime) <= h.gracePeriod: 97 result = *checkState.lastSuccess 98 case time.Since(checkState.lastResultTime) <= h.gracePeriod: 99 result = *checkState.lastResult 100 result.Message = stringPtr(wrap(result.Message, fmt.Sprintf("No successful checks during %s grace period", h.gracePeriod.String()))) 101 default: 102 result = *checkState.lastResult 103 result.Message = stringPtr(wrap(result.Message, fmt.Sprintf("No completed checks during %s grace period", h.gracePeriod.String()))) 104 // Mark REPAIRING if we were healthy before expiration. 105 if result.State == health.HealthStateHealthy { 106 result.State = health.HealthStateRepairing 107 } 108 } 109 results = append(results, result) 110 } 111 112 return toHealthStatus(results) 113 } 114 115 func (h *healthCheckSource) runPoll(ctx context.Context) { 116 ticker := time.NewTicker(h.retryInterval) 117 defer ticker.Stop() 118 if h.initialPoll { 119 h.doPoll(ctx) 120 } 121 for { 122 select { 123 case <-ctx.Done(): 124 return 125 case <-ticker.C: 126 // ensure that doPoll is not called if context is cancelled (without this, if ctx.Done() and ticker.C fire 127 // at the same time and the ticker.C case is selected at the top-level, doPoll may be called even though the 128 // context is done). 129 select { 130 case <-ctx.Done(): 131 return 132 default: 133 } 134 h.doPoll(ctx) 135 } 136 } 137 } 138 139 func (h *healthCheckSource) doPoll(ctx context.Context) { 140 type resultWithTime struct { 141 result *health.HealthCheckResult 142 time time.Time 143 } 144 145 // Run checks 146 resultsWithTimes := make([]resultWithTime, 0, len(h.source.Checks)) 147 for _, check := range h.source.Checks { 148 // run check before assigning to assure that the "time.Now()" value reflects when check was completed (rather than when it was started) 149 checkVal := check(ctx) 150 resultsWithTimes = append(resultsWithTimes, resultWithTime{ 151 time: time.Now(), 152 result: checkVal, 153 }) 154 } 155 156 // Update cached state 157 h.mutex.Lock() 158 defer h.mutex.Unlock() 159 for _, resultWithTime := range resultsWithTimes { 160 newState := &checkState{ 161 lastResult: resultWithTime.result, 162 lastResultTime: resultWithTime.time, 163 } 164 // populate last success state from previous state (if present) 165 if previousState, ok := h.checkStates[resultWithTime.result.Type]; ok { 166 newState.lastSuccess = previousState.lastSuccess 167 newState.lastSuccessTime = previousState.lastSuccessTime 168 } 169 // if current result is successful, update success state 170 if resultWithTime.result.State == health.HealthStateHealthy { 171 newState.lastSuccess = resultWithTime.result 172 newState.lastSuccessTime = resultWithTime.time 173 } 174 h.checkStates[resultWithTime.result.Type] = newState 175 } 176 } 177 178 func toHealthStatus(results []health.HealthCheckResult) health.HealthStatus { 179 checks := make(map[health.CheckType]health.HealthCheckResult, len(results)) 180 for _, result := range results { 181 checks[result.Type] = result 182 } 183 return health.HealthStatus{ 184 Checks: checks, 185 } 186 } 187 188 func newDefaultHealthCheckSource(checkType health.CheckType, poll func() error) Source { 189 return Source{ 190 Checks: map[health.CheckType]CheckFunc{ 191 checkType: func(ctx context.Context) *health.HealthCheckResult { 192 err := poll() 193 if err != nil { 194 return &health.HealthCheckResult{ 195 Type: checkType, 196 State: health.HealthStateError, 197 Message: stringPtr(err.Error()), 198 } 199 } 200 return &health.HealthCheckResult{ 201 Type: checkType, 202 State: health.HealthStateHealthy, 203 } 204 }, 205 }, 206 } 207 } 208 209 func wrap(baseStringPtr *string, prependStr string) string { 210 if baseStringPtr == nil { 211 return prependStr 212 } 213 return prependStr + ": " + *baseStringPtr 214 } 215 216 func stringPtr(s string) *string { 217 return &s 218 }