github.com/freiheit-com/kuberpult@v1.24.2-0.20240328135542-315d5630abe6/pkg/setup/health.go (about)

     1  /*This file is part of kuberpult.
     2  
     3  Kuberpult is free software: you can redistribute it and/or modify
     4  it under the terms of the Expat(MIT) License as published by
     5  the Free Software Foundation.
     6  
     7  Kuberpult is distributed in the hope that it will be useful,
     8  but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    10  MIT License for more details.
    11  
    12  You should have received a copy of the MIT License
    13  along with kuberpult. If not, see <https://directory.fsf.org/wiki/License:Expat>.
    14  
    15  Copyright 2023 freiheit.com*/
    16  
    17  // Setup implementation shared between all microservices.
    18  // If this file is changed it will affect _all_ microservices in the monorepo (and this
    19  // is deliberately so).
    20  package setup
    21  
    22  import (
    23  	"context"
    24  	"encoding/json"
    25  	"errors"
    26  	"fmt"
    27  	"net/http"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/cenkalti/backoff/v4"
    32  )
    33  
    34  type Health uint
    35  
    36  const (
    37  	HealthStarting Health = iota
    38  	HealthReady
    39  	HealthBackoff
    40  	HealthFailed
    41  )
    42  
    43  func (h Health) String() string {
    44  	switch h {
    45  	case HealthStarting:
    46  		return "starting"
    47  	case HealthReady:
    48  		return "ready"
    49  	case HealthBackoff:
    50  		return "backoff"
    51  	case HealthFailed:
    52  		return "failed"
    53  	}
    54  	return "unknown"
    55  }
    56  
    57  func (h Health) MarshalJSON() ([]byte, error) {
    58  	return json.Marshal(h.String())
    59  }
    60  
    61  type HealthReporter struct {
    62  	server  *HealthServer
    63  	name    string
    64  	backoff backoff.BackOff
    65  }
    66  
    67  type report struct {
    68  	Health  Health `json:"health"`
    69  	Message string `json:"message,omitempty"`
    70  
    71  	// a nil Deadline is interpreted as "valid forever"
    72  	Deadline *time.Time `json:"deadline,omitempty"`
    73  }
    74  
    75  func (r *report) isReady(now time.Time) bool {
    76  	if r.Health != HealthReady {
    77  		return false
    78  	}
    79  	if r.Deadline == nil {
    80  		return true
    81  	}
    82  	return now.Before(*r.Deadline)
    83  }
    84  
    85  func (r *HealthReporter) ReportReady(message string) {
    86  	r.ReportHealth(HealthReady, message)
    87  }
    88  
    89  func (r *HealthReporter) ReportHealth(health Health, message string) {
    90  	r.ReportHealthTtl(health, message, nil)
    91  }
    92  
    93  // ReportHealthTtl returns the deadline (for testing)
    94  func (r *HealthReporter) ReportHealthTtl(health Health, message string, ttl *time.Duration) *time.Time {
    95  	if r == nil {
    96  		return nil
    97  	}
    98  	if health == HealthReady {
    99  		r.backoff.Reset()
   100  	}
   101  	r.server.mx.Lock()
   102  	defer r.server.mx.Unlock()
   103  	if r.server.parts == nil {
   104  		r.server.parts = map[string]report{}
   105  	}
   106  	var deadline *time.Time
   107  	if ttl != nil {
   108  		dl := r.server.now().Add(*ttl)
   109  		deadline = &dl
   110  	}
   111  	r.server.parts[r.name] = report{
   112  		Health:   health,
   113  		Message:  message,
   114  		Deadline: deadline,
   115  	}
   116  	return deadline
   117  }
   118  
   119  /*
   120  Retry allows background services to set up reliable streaming with backoff.
   121  
   122  This can be used to create background tasks that look like this:
   123  
   124  	func Consume(ctx context.Context, hr *setup.HealthReporter) error {
   125  		state := initState()
   126  		return hr.Retry(ctx, func() error {
   127  			stream, err := startConsumer()
   128  			if err != nil {
   129  				return err
   130  			}
   131  			hr.ReportReady("receiving")
   132  			for {
   133  				select {
   134  				case <-ctx.Done(): return nil
   135  				case ev := <-stream: handleEvent(state, event)
   136  				}
   137  			}
   138  	  })
   139  	}
   140  
   141  In the example above, connecting to  the consumer will be retried a few times with backoff.
   142  The number of retries is reset whenever ReportReady is called so that successful connection heal the service.
   143  */
   144  func (r *HealthReporter) Retry(ctx context.Context, fn func() error) error {
   145  	bo := r.backoff
   146  	for {
   147  		err := fn()
   148  		select {
   149  		case <-ctx.Done():
   150  			return err
   151  		default:
   152  		}
   153  		if err != nil {
   154  			var perr *backoff.PermanentError
   155  			if errors.As(err, &perr) {
   156  				return perr.Unwrap()
   157  			}
   158  			r.ReportHealth(HealthBackoff, err.Error())
   159  		} else {
   160  			r.ReportHealth(HealthBackoff, "")
   161  		}
   162  		next := bo.NextBackOff()
   163  		if next == backoff.Stop {
   164  			return err
   165  		}
   166  		select {
   167  		case <-ctx.Done():
   168  			return nil
   169  		case <-time.After(next):
   170  			continue
   171  		}
   172  	}
   173  }
   174  
   175  type HealthServer struct {
   176  	parts          map[string]report
   177  	mx             sync.Mutex
   178  	BackOffFactory func() backoff.BackOff
   179  	Clock          func() time.Time
   180  }
   181  
   182  func (h *HealthServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   183  	reports := h.reports()
   184  	success := true
   185  	for _, r := range reports {
   186  		if !r.isReady(h.now()) {
   187  			success = false
   188  		}
   189  	}
   190  	body, err := json.Marshal(reports)
   191  	if err != nil {
   192  		panic(err)
   193  	}
   194  	w.Header().Set("Content-Length", fmt.Sprintf("%d", len(body)))
   195  	if success {
   196  		w.WriteHeader(http.StatusOK)
   197  	} else {
   198  		w.WriteHeader(http.StatusInternalServerError)
   199  	}
   200  	fmt.Fprint(w, string(body))
   201  }
   202  
   203  func (h *HealthServer) IsReady(name string) bool {
   204  	h.mx.Lock()
   205  	defer h.mx.Unlock()
   206  	if h.parts == nil {
   207  		return false
   208  	}
   209  	report := h.parts[name]
   210  	return report.isReady(h.now())
   211  }
   212  
   213  func (h *HealthServer) reports() map[string]report {
   214  	h.mx.Lock()
   215  	defer h.mx.Unlock()
   216  	result := make(map[string]report, len(h.parts))
   217  	for k, v := range h.parts {
   218  		result[k] = v
   219  	}
   220  	return result
   221  }
   222  
   223  func (h *HealthServer) now() time.Time {
   224  	if h.Clock != nil {
   225  		return h.Clock()
   226  	}
   227  	return time.Now()
   228  }
   229  
   230  func (h *HealthServer) Reporter(name string) *HealthReporter {
   231  	var bo backoff.BackOff
   232  	if h.BackOffFactory != nil {
   233  		bo = h.BackOffFactory()
   234  	} else {
   235  		bo = backoff.NewExponentialBackOff()
   236  	}
   237  	r := &HealthReporter{
   238  		server:  h,
   239  		name:    name,
   240  		backoff: bo,
   241  	}
   242  	r.ReportHealth(HealthStarting, "starting")
   243  	return r
   244  }
   245  
   246  func Permanent(err error) error {
   247  	return backoff.Permanent(err)
   248  }
   249  
   250  var (
   251  	_ http.Handler = (*HealthServer)(nil)
   252  )