github.com/freiheit-com/kuberpult@v1.24.2-0.20240328135542-315d5630abe6/pkg/setup/health.go (about) 1 /*This file is part of kuberpult. 2 3 Kuberpult is free software: you can redistribute it and/or modify 4 it under the terms of the Expat(MIT) License as published by 5 the Free Software Foundation. 6 7 Kuberpult is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 MIT License for more details. 11 12 You should have received a copy of the MIT License 13 along with kuberpult. If not, see <https://directory.fsf.org/wiki/License:Expat>. 14 15 Copyright 2023 freiheit.com*/ 16 17 // Setup implementation shared between all microservices. 18 // If this file is changed it will affect _all_ microservices in the monorepo (and this 19 // is deliberately so). 20 package setup 21 22 import ( 23 "context" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "net/http" 28 "sync" 29 "time" 30 31 "github.com/cenkalti/backoff/v4" 32 ) 33 34 type Health uint 35 36 const ( 37 HealthStarting Health = iota 38 HealthReady 39 HealthBackoff 40 HealthFailed 41 ) 42 43 func (h Health) String() string { 44 switch h { 45 case HealthStarting: 46 return "starting" 47 case HealthReady: 48 return "ready" 49 case HealthBackoff: 50 return "backoff" 51 case HealthFailed: 52 return "failed" 53 } 54 return "unknown" 55 } 56 57 func (h Health) MarshalJSON() ([]byte, error) { 58 return json.Marshal(h.String()) 59 } 60 61 type HealthReporter struct { 62 server *HealthServer 63 name string 64 backoff backoff.BackOff 65 } 66 67 type report struct { 68 Health Health `json:"health"` 69 Message string `json:"message,omitempty"` 70 71 // a nil Deadline is interpreted as "valid forever" 72 Deadline *time.Time `json:"deadline,omitempty"` 73 } 74 75 func (r *report) isReady(now time.Time) bool { 76 if r.Health != HealthReady { 77 return false 78 } 79 if r.Deadline == nil { 80 return true 81 } 82 return now.Before(*r.Deadline) 83 } 84 85 func (r *HealthReporter) ReportReady(message string) { 86 r.ReportHealth(HealthReady, message) 87 } 88 89 func (r *HealthReporter) ReportHealth(health Health, message string) { 90 r.ReportHealthTtl(health, message, nil) 91 } 92 93 // ReportHealthTtl returns the deadline (for testing) 94 func (r *HealthReporter) ReportHealthTtl(health Health, message string, ttl *time.Duration) *time.Time { 95 if r == nil { 96 return nil 97 } 98 if health == HealthReady { 99 r.backoff.Reset() 100 } 101 r.server.mx.Lock() 102 defer r.server.mx.Unlock() 103 if r.server.parts == nil { 104 r.server.parts = map[string]report{} 105 } 106 var deadline *time.Time 107 if ttl != nil { 108 dl := r.server.now().Add(*ttl) 109 deadline = &dl 110 } 111 r.server.parts[r.name] = report{ 112 Health: health, 113 Message: message, 114 Deadline: deadline, 115 } 116 return deadline 117 } 118 119 /* 120 Retry allows background services to set up reliable streaming with backoff. 121 122 This can be used to create background tasks that look like this: 123 124 func Consume(ctx context.Context, hr *setup.HealthReporter) error { 125 state := initState() 126 return hr.Retry(ctx, func() error { 127 stream, err := startConsumer() 128 if err != nil { 129 return err 130 } 131 hr.ReportReady("receiving") 132 for { 133 select { 134 case <-ctx.Done(): return nil 135 case ev := <-stream: handleEvent(state, event) 136 } 137 } 138 }) 139 } 140 141 In the example above, connecting to the consumer will be retried a few times with backoff. 142 The number of retries is reset whenever ReportReady is called so that successful connection heal the service. 143 */ 144 func (r *HealthReporter) Retry(ctx context.Context, fn func() error) error { 145 bo := r.backoff 146 for { 147 err := fn() 148 select { 149 case <-ctx.Done(): 150 return err 151 default: 152 } 153 if err != nil { 154 var perr *backoff.PermanentError 155 if errors.As(err, &perr) { 156 return perr.Unwrap() 157 } 158 r.ReportHealth(HealthBackoff, err.Error()) 159 } else { 160 r.ReportHealth(HealthBackoff, "") 161 } 162 next := bo.NextBackOff() 163 if next == backoff.Stop { 164 return err 165 } 166 select { 167 case <-ctx.Done(): 168 return nil 169 case <-time.After(next): 170 continue 171 } 172 } 173 } 174 175 type HealthServer struct { 176 parts map[string]report 177 mx sync.Mutex 178 BackOffFactory func() backoff.BackOff 179 Clock func() time.Time 180 } 181 182 func (h *HealthServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { 183 reports := h.reports() 184 success := true 185 for _, r := range reports { 186 if !r.isReady(h.now()) { 187 success = false 188 } 189 } 190 body, err := json.Marshal(reports) 191 if err != nil { 192 panic(err) 193 } 194 w.Header().Set("Content-Length", fmt.Sprintf("%d", len(body))) 195 if success { 196 w.WriteHeader(http.StatusOK) 197 } else { 198 w.WriteHeader(http.StatusInternalServerError) 199 } 200 fmt.Fprint(w, string(body)) 201 } 202 203 func (h *HealthServer) IsReady(name string) bool { 204 h.mx.Lock() 205 defer h.mx.Unlock() 206 if h.parts == nil { 207 return false 208 } 209 report := h.parts[name] 210 return report.isReady(h.now()) 211 } 212 213 func (h *HealthServer) reports() map[string]report { 214 h.mx.Lock() 215 defer h.mx.Unlock() 216 result := make(map[string]report, len(h.parts)) 217 for k, v := range h.parts { 218 result[k] = v 219 } 220 return result 221 } 222 223 func (h *HealthServer) now() time.Time { 224 if h.Clock != nil { 225 return h.Clock() 226 } 227 return time.Now() 228 } 229 230 func (h *HealthServer) Reporter(name string) *HealthReporter { 231 var bo backoff.BackOff 232 if h.BackOffFactory != nil { 233 bo = h.BackOffFactory() 234 } else { 235 bo = backoff.NewExponentialBackOff() 236 } 237 r := &HealthReporter{ 238 server: h, 239 name: name, 240 backoff: bo, 241 } 242 r.ReportHealth(HealthStarting, "starting") 243 return r 244 } 245 246 func Permanent(err error) error { 247 return backoff.Permanent(err) 248 } 249 250 var ( 251 _ http.Handler = (*HealthServer)(nil) 252 )