github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/services/health/health.go (about)

     1  package health
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/authzed/grpcutil"
     8  	"github.com/cenkalti/backoff/v4"
     9  	healthpb "google.golang.org/grpc/health/grpc_health_v1"
    10  
    11  	"github.com/authzed/spicedb/internal/dispatch"
    12  	log "github.com/authzed/spicedb/internal/logging"
    13  	"github.com/authzed/spicedb/pkg/datastore"
    14  )
    15  
    16  const datastoreReadyTimeout = time.Millisecond * 500
    17  
    18  // NewHealthManager creates and returns a new health manager that checks the IsReady
    19  // status of the given dispatcher and datastore checker and sets the health check to
    20  // return healthy once both have gone to true.
    21  func NewHealthManager(dispatcher dispatch.Dispatcher, dsc DatastoreChecker) Manager {
    22  	healthSvc := grpcutil.NewAuthlessHealthServer()
    23  	return &healthManager{healthSvc, dispatcher, dsc, map[string]struct{}{}}
    24  }
    25  
    26  // DatastoreChecker is an interface for determining if the datastore is ready for
    27  // traffic.
    28  type DatastoreChecker interface {
    29  	// ReadyState returns whether the datastore is ready to be used.
    30  	ReadyState(ctx context.Context) (datastore.ReadyState, error)
    31  }
    32  
    33  // Manager is a system which manages the health service statuses.
    34  type Manager interface {
    35  	// RegisterReportedService registers the name of service under the same server
    36  	// for whom the health is being managed by this manager.
    37  	RegisterReportedService(serviceName string)
    38  
    39  	// HealthSvc is the health service this manager is managing.
    40  	HealthSvc() *grpcutil.AuthlessHealthServer
    41  
    42  	// Checker returns a function that can be run via an errgroup to perform the health checks.
    43  	Checker(ctx context.Context) func() error
    44  }
    45  
    46  type healthManager struct {
    47  	healthSvc    *grpcutil.AuthlessHealthServer
    48  	dispatcher   dispatch.Dispatcher
    49  	dsc          DatastoreChecker
    50  	serviceNames map[string]struct{}
    51  }
    52  
    53  func (hm *healthManager) HealthSvc() *grpcutil.AuthlessHealthServer {
    54  	return hm.healthSvc
    55  }
    56  
    57  func (hm *healthManager) RegisterReportedService(serviceName string) {
    58  	hm.serviceNames[serviceName] = struct{}{}
    59  	hm.healthSvc.Server.SetServingStatus(serviceName, healthpb.HealthCheckResponse_NOT_SERVING)
    60  }
    61  
    62  func (hm *healthManager) Checker(ctx context.Context) func() error {
    63  	return func() error {
    64  		// Run immediately for the initial check
    65  		backoffInterval := backoff.NewExponentialBackOff()
    66  		backoffInterval.MaxElapsedTime = 0
    67  
    68  		ticker := time.After(0)
    69  
    70  		for {
    71  			select {
    72  			case _, ok := <-ticker:
    73  				if !ok {
    74  					log.Ctx(ctx).Warn().Msg("backoff error while waiting for dispatcher or datastore health")
    75  					return nil
    76  				}
    77  
    78  			case <-ctx.Done():
    79  				log.Ctx(ctx).Info().Msg("datastore health check canceled")
    80  				return nil
    81  			}
    82  
    83  			isReady := hm.checkIsReady(ctx)
    84  			if isReady {
    85  				for serviceName := range hm.serviceNames {
    86  					hm.healthSvc.Server.SetServingStatus(serviceName, healthpb.HealthCheckResponse_SERVING)
    87  				}
    88  				return nil
    89  			}
    90  
    91  			nextPush := backoffInterval.NextBackOff()
    92  			if nextPush == backoff.Stop {
    93  				log.Ctx(ctx).Warn().Msg("exceed max attempts to check for dispatch or datastore ready")
    94  				return nil
    95  			}
    96  			ticker = time.After(nextPush)
    97  		}
    98  	}
    99  }
   100  
   101  func (hm *healthManager) checkIsReady(ctx context.Context) bool {
   102  	log.Ctx(ctx).Debug().Msg("checking if datastore and dispatcher are ready")
   103  
   104  	ctx, cancel := context.WithTimeout(ctx, datastoreReadyTimeout)
   105  	defer cancel()
   106  
   107  	dsReady, err := hm.dsc.ReadyState(ctx)
   108  	if err != nil {
   109  		log.Ctx(ctx).Warn().Err(err).Msg("could not check if the datastore was ready")
   110  	}
   111  
   112  	if !dsReady.IsReady {
   113  		log.Ctx(ctx).Warn().Bool("datastoreReady", false).Msgf("datastore failed readiness checks: %s", dsReady.Message)
   114  		return false
   115  	}
   116  
   117  	dispatchReady := hm.dispatcher.ReadyState()
   118  	if !dispatchReady.IsReady {
   119  		log.Ctx(ctx).Warn().Bool("dispatchReady", false).Msgf("dispatcher failed readiness checks: %s", dispatchReady.Message)
   120  		return false
   121  	}
   122  
   123  	log.Ctx(ctx).Debug().Bool("datastoreReady", true).Bool("dispatchReady", true).Msg("completed dispatcher and datastore readiness checks")
   124  	return true
   125  }