github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/services/health/health.go (about) 1 package health 2 3 import ( 4 "context" 5 "time" 6 7 "github.com/authzed/grpcutil" 8 "github.com/cenkalti/backoff/v4" 9 healthpb "google.golang.org/grpc/health/grpc_health_v1" 10 11 "github.com/authzed/spicedb/internal/dispatch" 12 log "github.com/authzed/spicedb/internal/logging" 13 "github.com/authzed/spicedb/pkg/datastore" 14 ) 15 16 const datastoreReadyTimeout = time.Millisecond * 500 17 18 // NewHealthManager creates and returns a new health manager that checks the IsReady 19 // status of the given dispatcher and datastore checker and sets the health check to 20 // return healthy once both have gone to true. 21 func NewHealthManager(dispatcher dispatch.Dispatcher, dsc DatastoreChecker) Manager { 22 healthSvc := grpcutil.NewAuthlessHealthServer() 23 return &healthManager{healthSvc, dispatcher, dsc, map[string]struct{}{}} 24 } 25 26 // DatastoreChecker is an interface for determining if the datastore is ready for 27 // traffic. 28 type DatastoreChecker interface { 29 // ReadyState returns whether the datastore is ready to be used. 30 ReadyState(ctx context.Context) (datastore.ReadyState, error) 31 } 32 33 // Manager is a system which manages the health service statuses. 34 type Manager interface { 35 // RegisterReportedService registers the name of service under the same server 36 // for whom the health is being managed by this manager. 37 RegisterReportedService(serviceName string) 38 39 // HealthSvc is the health service this manager is managing. 40 HealthSvc() *grpcutil.AuthlessHealthServer 41 42 // Checker returns a function that can be run via an errgroup to perform the health checks. 43 Checker(ctx context.Context) func() error 44 } 45 46 type healthManager struct { 47 healthSvc *grpcutil.AuthlessHealthServer 48 dispatcher dispatch.Dispatcher 49 dsc DatastoreChecker 50 serviceNames map[string]struct{} 51 } 52 53 func (hm *healthManager) HealthSvc() *grpcutil.AuthlessHealthServer { 54 return hm.healthSvc 55 } 56 57 func (hm *healthManager) RegisterReportedService(serviceName string) { 58 hm.serviceNames[serviceName] = struct{}{} 59 hm.healthSvc.Server.SetServingStatus(serviceName, healthpb.HealthCheckResponse_NOT_SERVING) 60 } 61 62 func (hm *healthManager) Checker(ctx context.Context) func() error { 63 return func() error { 64 // Run immediately for the initial check 65 backoffInterval := backoff.NewExponentialBackOff() 66 backoffInterval.MaxElapsedTime = 0 67 68 ticker := time.After(0) 69 70 for { 71 select { 72 case _, ok := <-ticker: 73 if !ok { 74 log.Ctx(ctx).Warn().Msg("backoff error while waiting for dispatcher or datastore health") 75 return nil 76 } 77 78 case <-ctx.Done(): 79 log.Ctx(ctx).Info().Msg("datastore health check canceled") 80 return nil 81 } 82 83 isReady := hm.checkIsReady(ctx) 84 if isReady { 85 for serviceName := range hm.serviceNames { 86 hm.healthSvc.Server.SetServingStatus(serviceName, healthpb.HealthCheckResponse_SERVING) 87 } 88 return nil 89 } 90 91 nextPush := backoffInterval.NextBackOff() 92 if nextPush == backoff.Stop { 93 log.Ctx(ctx).Warn().Msg("exceed max attempts to check for dispatch or datastore ready") 94 return nil 95 } 96 ticker = time.After(nextPush) 97 } 98 } 99 } 100 101 func (hm *healthManager) checkIsReady(ctx context.Context) bool { 102 log.Ctx(ctx).Debug().Msg("checking if datastore and dispatcher are ready") 103 104 ctx, cancel := context.WithTimeout(ctx, datastoreReadyTimeout) 105 defer cancel() 106 107 dsReady, err := hm.dsc.ReadyState(ctx) 108 if err != nil { 109 log.Ctx(ctx).Warn().Err(err).Msg("could not check if the datastore was ready") 110 } 111 112 if !dsReady.IsReady { 113 log.Ctx(ctx).Warn().Bool("datastoreReady", false).Msgf("datastore failed readiness checks: %s", dsReady.Message) 114 return false 115 } 116 117 dispatchReady := hm.dispatcher.ReadyState() 118 if !dispatchReady.IsReady { 119 log.Ctx(ctx).Warn().Bool("dispatchReady", false).Msgf("dispatcher failed readiness checks: %s", dispatchReady.Message) 120 return false 121 } 122 123 log.Ctx(ctx).Debug().Bool("datastoreReady", true).Bool("dispatchReady", true).Msg("completed dispatcher and datastore readiness checks") 124 return true 125 }