github.com/splucs/witchcraft-go-server@v1.7.0/status/health/periodic/source.go (about)

     1  // Copyright (c) 2018 Palantir Technologies. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package periodic
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/palantir/witchcraft-go-logging/wlog/wapp"
    24  	"github.com/palantir/witchcraft-go-server/conjure/witchcraft/api/health"
    25  	"github.com/palantir/witchcraft-go-server/status"
    26  )
    27  
    28  type CheckFunc func(ctx context.Context) *health.HealthCheckResult
    29  
    30  type Source struct {
    31  	Checks map[health.CheckType]CheckFunc
    32  }
    33  
    34  type checkState struct {
    35  	lastResult      *health.HealthCheckResult
    36  	lastResultTime  time.Time
    37  	lastSuccess     *health.HealthCheckResult
    38  	lastSuccessTime time.Time
    39  }
    40  
    41  type healthCheckSource struct {
    42  	// static
    43  	source        Source
    44  	gracePeriod   time.Duration
    45  	retryInterval time.Duration
    46  	initialPoll   bool
    47  
    48  	// mutable
    49  	mutex       sync.RWMutex
    50  	checkStates map[health.CheckType]*checkState
    51  }
    52  
    53  // NewHealthCheckSource creates a health check source that calls poll every retryInterval in a goroutine. The goroutine
    54  // is cancelled if ctx is cancelled. If gracePeriod elapses without poll returning nil, the returned health check
    55  // source will give a health status of error. checkType is the key to be used in the health result returned by the
    56  // health check source.
    57  func NewHealthCheckSource(ctx context.Context, gracePeriod time.Duration, retryInterval time.Duration, checkType health.CheckType, poll func() error, options ...Option) status.HealthCheckSource {
    58  	return FromHealthCheckSource(ctx, gracePeriod, retryInterval, newDefaultHealthCheckSource(checkType, poll), options...)
    59  }
    60  
    61  // FromHealthCheckSource creates a health check source that calls the the provided Source.Checks functions every
    62  // retryInterval in a goroutine. The goroutine is cancelled if ctx is cancelled. For each check, if gracePeriod elapses
    63  // without CheckFunc returning HEALTHY, the returned health check source's HealthStatus will return a HealthCheckResult
    64  // of error.
    65  func FromHealthCheckSource(ctx context.Context, gracePeriod time.Duration, retryInterval time.Duration, source Source, options ...Option) status.HealthCheckSource {
    66  	checker := &healthCheckSource{
    67  		source:        source,
    68  		gracePeriod:   gracePeriod,
    69  		retryInterval: retryInterval,
    70  		checkStates:   map[health.CheckType]*checkState{},
    71  	}
    72  	for _, option := range options {
    73  		option.apply(checker)
    74  	}
    75  	go wapp.RunWithRecoveryLogging(ctx, checker.runPoll)
    76  	return checker
    77  }
    78  
    79  func (h *healthCheckSource) HealthStatus(ctx context.Context) health.HealthStatus {
    80  	h.mutex.RLock()
    81  	defer h.mutex.RUnlock()
    82  
    83  	results := make([]health.HealthCheckResult, 0, len(h.source.Checks))
    84  	for checkType := range h.source.Checks {
    85  		checkState, ok := h.checkStates[checkType]
    86  		if !ok {
    87  			results = append(results, health.HealthCheckResult{
    88  				Type:    checkType,
    89  				State:   health.HealthStateRepairing,
    90  				Message: stringPtr("Check has not yet run"),
    91  			})
    92  			continue
    93  		}
    94  		var result health.HealthCheckResult
    95  		switch {
    96  		case time.Since(checkState.lastSuccessTime) <= h.gracePeriod:
    97  			result = *checkState.lastSuccess
    98  		case time.Since(checkState.lastResultTime) <= h.gracePeriod:
    99  			result = *checkState.lastResult
   100  			result.Message = stringPtr(wrap(result.Message, fmt.Sprintf("No successful checks during %s grace period", h.gracePeriod.String())))
   101  		default:
   102  			result = *checkState.lastResult
   103  			result.Message = stringPtr(wrap(result.Message, fmt.Sprintf("No completed checks during %s grace period", h.gracePeriod.String())))
   104  			// Mark REPAIRING if we were healthy before expiration.
   105  			if result.State == health.HealthStateHealthy {
   106  				result.State = health.HealthStateRepairing
   107  			}
   108  		}
   109  		results = append(results, result)
   110  	}
   111  
   112  	return toHealthStatus(results)
   113  }
   114  
   115  func (h *healthCheckSource) runPoll(ctx context.Context) {
   116  	ticker := time.NewTicker(h.retryInterval)
   117  	defer ticker.Stop()
   118  	if h.initialPoll {
   119  		h.doPoll(ctx)
   120  	}
   121  	for {
   122  		select {
   123  		case <-ctx.Done():
   124  			return
   125  		case <-ticker.C:
   126  			// ensure that doPoll is not called if context is cancelled (without this, if ctx.Done() and ticker.C fire
   127  			// at the same time and the ticker.C case is selected at the top-level, doPoll may be called even though the
   128  			// context is done).
   129  			select {
   130  			case <-ctx.Done():
   131  				return
   132  			default:
   133  			}
   134  			h.doPoll(ctx)
   135  		}
   136  	}
   137  }
   138  
   139  func (h *healthCheckSource) doPoll(ctx context.Context) {
   140  	type resultWithTime struct {
   141  		result *health.HealthCheckResult
   142  		time   time.Time
   143  	}
   144  
   145  	// Run checks
   146  	resultsWithTimes := make([]resultWithTime, 0, len(h.source.Checks))
   147  	for _, check := range h.source.Checks {
   148  		// run check before assigning to assure that the "time.Now()" value reflects when check was completed (rather than when it was started)
   149  		checkVal := check(ctx)
   150  		resultsWithTimes = append(resultsWithTimes, resultWithTime{
   151  			time:   time.Now(),
   152  			result: checkVal,
   153  		})
   154  	}
   155  
   156  	// Update cached state
   157  	h.mutex.Lock()
   158  	defer h.mutex.Unlock()
   159  	for _, resultWithTime := range resultsWithTimes {
   160  		newState := &checkState{
   161  			lastResult:     resultWithTime.result,
   162  			lastResultTime: resultWithTime.time,
   163  		}
   164  		// populate last success state from previous state (if present)
   165  		if previousState, ok := h.checkStates[resultWithTime.result.Type]; ok {
   166  			newState.lastSuccess = previousState.lastSuccess
   167  			newState.lastSuccessTime = previousState.lastSuccessTime
   168  		}
   169  		// if current result is successful, update success state
   170  		if resultWithTime.result.State == health.HealthStateHealthy {
   171  			newState.lastSuccess = resultWithTime.result
   172  			newState.lastSuccessTime = resultWithTime.time
   173  		}
   174  		h.checkStates[resultWithTime.result.Type] = newState
   175  	}
   176  }
   177  
   178  func toHealthStatus(results []health.HealthCheckResult) health.HealthStatus {
   179  	checks := make(map[health.CheckType]health.HealthCheckResult, len(results))
   180  	for _, result := range results {
   181  		checks[result.Type] = result
   182  	}
   183  	return health.HealthStatus{
   184  		Checks: checks,
   185  	}
   186  }
   187  
   188  func newDefaultHealthCheckSource(checkType health.CheckType, poll func() error) Source {
   189  	return Source{
   190  		Checks: map[health.CheckType]CheckFunc{
   191  			checkType: func(ctx context.Context) *health.HealthCheckResult {
   192  				err := poll()
   193  				if err != nil {
   194  					return &health.HealthCheckResult{
   195  						Type:    checkType,
   196  						State:   health.HealthStateError,
   197  						Message: stringPtr(err.Error()),
   198  					}
   199  				}
   200  				return &health.HealthCheckResult{
   201  					Type:  checkType,
   202  					State: health.HealthStateHealthy,
   203  				}
   204  			},
   205  		},
   206  	}
   207  }
   208  
   209  func wrap(baseStringPtr *string, prependStr string) string {
   210  	if baseStringPtr == nil {
   211  		return prependStr
   212  	}
   213  	return prependStr + ": " + *baseStringPtr
   214  }
   215  
   216  func stringPtr(s string) *string {
   217  	return &s
   218  }