github.com/splucs/witchcraft-go-server@v1.7.0/status/health/periodic/source_test.go (about)

     1  // Copyright (c) 2019 Palantir Technologies. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package periodic
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/palantir/witchcraft-go-server/conjure/witchcraft/api/health"
    23  	"github.com/stretchr/testify/assert"
    24  )
    25  
    26  const (
    27  	checkType      = "TEST_CHECK"
    28  	otherCheckType = "OTHER_TEST_CHECK"
    29  )
    30  
    31  func TestHealthCheckSource_HealthStatus(t *testing.T) {
    32  	for _, test := range []struct {
    33  		Name     string
    34  		State    *healthCheckSource
    35  		Expected health.HealthStatus
    36  	}{
    37  		{
    38  			Name: "Last result successful",
    39  			State: &healthCheckSource{
    40  				source: Source{
    41  					Checks: map[health.CheckType]CheckFunc{
    42  						checkType: nil,
    43  					},
    44  				},
    45  				gracePeriod: time.Minute,
    46  				checkStates: map[health.CheckType]*checkState{
    47  					checkType: {
    48  						lastResult: &health.HealthCheckResult{
    49  							Type:  checkType,
    50  							State: health.HealthStateHealthy,
    51  						},
    52  						lastResultTime: time.Now(),
    53  						lastSuccess: &health.HealthCheckResult{
    54  							Type:  checkType,
    55  							State: health.HealthStateHealthy,
    56  						},
    57  						lastSuccessTime: time.Now(),
    58  					},
    59  				},
    60  			},
    61  			Expected: health.HealthStatus{
    62  				Checks: map[health.CheckType]health.HealthCheckResult{
    63  					checkType: {
    64  						Type:  checkType,
    65  						State: health.HealthStateHealthy,
    66  					},
    67  				},
    68  			},
    69  		},
    70  		{
    71  			Name: "Last success within grace period",
    72  			State: &healthCheckSource{
    73  				source: Source{
    74  					Checks: map[health.CheckType]CheckFunc{
    75  						checkType: nil,
    76  					},
    77  				},
    78  				gracePeriod: time.Hour,
    79  				checkStates: map[health.CheckType]*checkState{
    80  					checkType: {
    81  						lastResult: &health.HealthCheckResult{
    82  							Type:  checkType,
    83  							State: health.HealthStateError,
    84  						},
    85  						lastResultTime: time.Now(),
    86  						lastSuccess: &health.HealthCheckResult{
    87  							Type:  checkType,
    88  							State: health.HealthStateHealthy,
    89  						},
    90  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
    91  					},
    92  				},
    93  			},
    94  			Expected: health.HealthStatus{
    95  				Checks: map[health.CheckType]health.HealthCheckResult{
    96  					checkType: {
    97  						Type:  checkType,
    98  						State: health.HealthStateHealthy,
    99  					},
   100  				},
   101  			},
   102  		},
   103  		{
   104  			Name: "Last success outside grace period",
   105  			State: &healthCheckSource{
   106  				source: Source{
   107  					Checks: map[health.CheckType]CheckFunc{
   108  						checkType: nil,
   109  					},
   110  				},
   111  				gracePeriod: time.Minute,
   112  				checkStates: map[health.CheckType]*checkState{
   113  					checkType: {
   114  						lastResult: &health.HealthCheckResult{
   115  							Type:  checkType,
   116  							State: health.HealthStateError,
   117  						},
   118  						lastResultTime: time.Now(),
   119  						lastSuccess: &health.HealthCheckResult{
   120  							Type:  checkType,
   121  							State: health.HealthStateHealthy,
   122  						},
   123  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
   124  					},
   125  				},
   126  			},
   127  			Expected: health.HealthStatus{
   128  				Checks: map[health.CheckType]health.HealthCheckResult{
   129  					checkType: {
   130  						Type:    checkType,
   131  						State:   health.HealthStateError,
   132  						Message: stringPtr("No successful checks during 1m0s grace period"),
   133  					},
   134  				},
   135  			},
   136  		},
   137  		{
   138  			Name: "No runs within grace period, last was success",
   139  			State: &healthCheckSource{
   140  				source: Source{
   141  					Checks: map[health.CheckType]CheckFunc{
   142  						checkType: nil,
   143  					},
   144  				},
   145  				gracePeriod: time.Minute,
   146  				checkStates: map[health.CheckType]*checkState{
   147  					checkType: {
   148  						lastResult: &health.HealthCheckResult{
   149  							Type:  checkType,
   150  							State: health.HealthStateHealthy,
   151  						},
   152  						lastResultTime: time.Now().Add(-5 * time.Minute),
   153  						lastSuccess: &health.HealthCheckResult{
   154  							Type:  checkType,
   155  							State: health.HealthStateHealthy,
   156  						},
   157  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
   158  					},
   159  				},
   160  			},
   161  			Expected: health.HealthStatus{
   162  				Checks: map[health.CheckType]health.HealthCheckResult{
   163  					checkType: {
   164  						Type:    checkType,
   165  						State:   health.HealthStateRepairing,
   166  						Message: stringPtr("No completed checks during 1m0s grace period"),
   167  					},
   168  				},
   169  			},
   170  		},
   171  		{
   172  			Name: "No runs within grace period, last was error",
   173  			State: &healthCheckSource{
   174  				source: Source{
   175  					Checks: map[health.CheckType]CheckFunc{
   176  						checkType: nil,
   177  					},
   178  				},
   179  				gracePeriod: time.Minute,
   180  				checkStates: map[health.CheckType]*checkState{
   181  					checkType: {
   182  						lastResult: &health.HealthCheckResult{
   183  							Type:  checkType,
   184  							State: health.HealthStateError,
   185  						},
   186  						lastResultTime: time.Now().Add(-3 * time.Minute),
   187  						lastSuccess: &health.HealthCheckResult{
   188  							Type:  checkType,
   189  							State: health.HealthStateHealthy,
   190  						},
   191  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
   192  					},
   193  				},
   194  			},
   195  			Expected: health.HealthStatus{
   196  				Checks: map[health.CheckType]health.HealthCheckResult{
   197  					checkType: {
   198  						Type:    checkType,
   199  						State:   health.HealthStateError,
   200  						Message: stringPtr("No completed checks during 1m0s grace period"),
   201  					},
   202  				},
   203  			},
   204  		},
   205  		{
   206  			Name: "No runs within grace period, last was error, with a message",
   207  			State: &healthCheckSource{
   208  				source: Source{
   209  					Checks: map[health.CheckType]CheckFunc{
   210  						checkType: nil,
   211  					},
   212  				},
   213  				gracePeriod: time.Minute,
   214  				checkStates: map[health.CheckType]*checkState{
   215  					checkType: {
   216  						lastResult: &health.HealthCheckResult{
   217  							Type:    checkType,
   218  							State:   health.HealthStateError,
   219  							Message: stringPtr("something went wrong"),
   220  						},
   221  						lastResultTime: time.Now().Add(-3 * time.Minute),
   222  						lastSuccess: &health.HealthCheckResult{
   223  							Type:  checkType,
   224  							State: health.HealthStateHealthy,
   225  						},
   226  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
   227  					},
   228  				},
   229  			},
   230  			Expected: health.HealthStatus{
   231  				Checks: map[health.CheckType]health.HealthCheckResult{
   232  					checkType: {
   233  						Type:    checkType,
   234  						State:   health.HealthStateError,
   235  						Message: stringPtr("No completed checks during 1m0s grace period: something went wrong"),
   236  					},
   237  				},
   238  			},
   239  		},
   240  		{
   241  			Name: "Never started",
   242  			State: &healthCheckSource{
   243  				source: Source{
   244  					Checks: map[health.CheckType]CheckFunc{
   245  						checkType: nil,
   246  					},
   247  				},
   248  				gracePeriod: time.Minute,
   249  			},
   250  			Expected: health.HealthStatus{
   251  				Checks: map[health.CheckType]health.HealthCheckResult{
   252  					checkType: {
   253  						Type:    checkType,
   254  						State:   health.HealthStateRepairing,
   255  						Message: stringPtr("Check has not yet run"),
   256  					},
   257  				},
   258  			},
   259  		},
   260  		{
   261  			Name: "Two checks, one last result successful, one last success outside grace period",
   262  			State: &healthCheckSource{
   263  				source: Source{
   264  					Checks: map[health.CheckType]CheckFunc{
   265  						checkType:      nil,
   266  						otherCheckType: nil,
   267  					},
   268  				},
   269  				gracePeriod: time.Minute,
   270  				checkStates: map[health.CheckType]*checkState{
   271  					checkType: {
   272  						lastResult: &health.HealthCheckResult{
   273  							Type:  checkType,
   274  							State: health.HealthStateHealthy,
   275  						},
   276  						lastResultTime: time.Now(),
   277  						lastSuccess: &health.HealthCheckResult{
   278  							Type:  checkType,
   279  							State: health.HealthStateHealthy,
   280  						},
   281  						lastSuccessTime: time.Now(),
   282  					},
   283  					otherCheckType: {
   284  						lastResult: &health.HealthCheckResult{
   285  							Type:  otherCheckType,
   286  							State: health.HealthStateError,
   287  						},
   288  						lastResultTime: time.Now(),
   289  						lastSuccess: &health.HealthCheckResult{
   290  							Type:  otherCheckType,
   291  							State: health.HealthStateHealthy,
   292  						},
   293  						lastSuccessTime: time.Now().Add(-5 * time.Minute),
   294  					},
   295  				},
   296  			},
   297  			Expected: health.HealthStatus{
   298  				Checks: map[health.CheckType]health.HealthCheckResult{
   299  					checkType: {
   300  						Type:  checkType,
   301  						State: health.HealthStateHealthy,
   302  					},
   303  					otherCheckType: {
   304  						Type:    otherCheckType,
   305  						State:   health.HealthStateError,
   306  						Message: stringPtr("No successful checks during 1m0s grace period"),
   307  					},
   308  				},
   309  			},
   310  		},
   311  		{
   312  			Name: "Two checks, neither started",
   313  			State: &healthCheckSource{
   314  				source: Source{
   315  					Checks: map[health.CheckType]CheckFunc{
   316  						checkType:      nil,
   317  						otherCheckType: nil,
   318  					},
   319  				},
   320  				gracePeriod: time.Minute,
   321  			},
   322  			Expected: health.HealthStatus{
   323  				Checks: map[health.CheckType]health.HealthCheckResult{
   324  					checkType: {
   325  						Type:    checkType,
   326  						State:   health.HealthStateRepairing,
   327  						Message: stringPtr("Check has not yet run"),
   328  					},
   329  					otherCheckType: {
   330  						Type:    otherCheckType,
   331  						State:   health.HealthStateRepairing,
   332  						Message: stringPtr("Check has not yet run"),
   333  					},
   334  				},
   335  			},
   336  		},
   337  	} {
   338  		t.Run(test.Name, func(t *testing.T) {
   339  			result := test.State.HealthStatus(context.Background())
   340  			assert.Equal(t, test.Expected, result)
   341  		})
   342  	}
   343  }
   344  
   345  // Test does the following:
   346  //   * Starts health check source with 10ms retry interval and 100ms grace period
   347  //   * First health check returns healthy (t=10ms, counter=0)
   348  //   * Second health check returns unhealthy (t=20ms, counter=1)
   349  //   * When third health check is run, pause the health check routine and signal that health should be checked (t=30ms)
   350  //   * TEST: health status should be healthy, counter=0 (checking at roughly t=30ms, so there was a healthy check within grace period)
   351  //   * Wait until grace period has elapsed since healthy check was returned (roughly t=130ms)
   352  //   * Third check returns unhealthy (t=130ms, counter=2)
   353  //   * TEST: health status should be unhealthy due to no success within grace period (checking at roughly t=130ms, so there was a check that occurred within the grace period, but no successful check within the grace period)
   354  //   * Wait until grace period has elapsed (t=230ms)
   355  //   * TEST: health status should be unhealthy due to no check within grace period (checking at roughly t=230ms, so there is no check that occurred within the grace period)
   356  func TestFromHealthCheckSource(t *testing.T) {
   357  	// health check sends on this channel on its third run (after it has returned healthy and then error)
   358  	doneChan := make(chan struct{})
   359  	defer close(doneChan)
   360  
   361  	// health check waits on this channel on its third run (after it has sent on doneChan)
   362  	pauseChan := make(chan struct{})
   363  	defer close(pauseChan)
   364  
   365  	ctx, cancel := context.WithCancel(context.Background())
   366  	defer cancel()
   367  
   368  	gracePeriod := 100 * time.Millisecond
   369  	retryInterval := 10 * time.Millisecond
   370  	counter := 0
   371  
   372  	source := FromHealthCheckSource(ctx, gracePeriod, retryInterval, Source{
   373  		Checks: map[health.CheckType]CheckFunc{
   374  			checkType: func(ctx context.Context) (rVal *health.HealthCheckResult) {
   375  				defer func() {
   376  					counter++
   377  				}()
   378  
   379  				switch counter {
   380  				// return healthy state on first run
   381  				case 0:
   382  					return &health.HealthCheckResult{
   383  						Type:    checkType,
   384  						State:   health.HealthStateHealthy,
   385  						Message: stringPtr("Healthy state"),
   386  						Params: map[string]interface{}{
   387  							"counter": counter,
   388  						},
   389  					}
   390  				// return error state on second run
   391  				case 1:
   392  					return &health.HealthCheckResult{
   393  						Type:    checkType,
   394  						State:   health.HealthStateError,
   395  						Message: stringPtr("Error state"),
   396  						Params: map[string]interface{}{
   397  							"counter": counter,
   398  						},
   399  					}
   400  				// on third run, send on doneChan and read from pauseChan
   401  				case 2:
   402  					// signal that health can be checked
   403  					doneChan <- struct{}{}
   404  					// pause until health check has occurred
   405  					<-pauseChan
   406  					// return unhealthy
   407  					return &health.HealthCheckResult{
   408  						Type:    checkType,
   409  						State:   health.HealthStateError,
   410  						Message: stringPtr("Error state"),
   411  						Params: map[string]interface{}{
   412  							"counter": counter,
   413  						},
   414  					}
   415  				case 3:
   416  					// signal that health can be checked
   417  					doneChan <- struct{}{}
   418  					// pause (do not return until test has completed)
   419  					<-pauseChan
   420  				}
   421  				return nil
   422  			},
   423  		},
   424  	})
   425  
   426  	// wait until health check has returned healthy and then unhealthy
   427  	<-doneChan
   428  	status := source.HealthStatus(ctx)
   429  
   430  	// health check should be healthy: even though health source returned error state most recently, it returned
   431  	// healthy state within the grace period
   432  	assert.Equal(t, map[health.CheckType]health.HealthCheckResult{
   433  		checkType: {
   434  			Type:    checkType,
   435  			State:   health.HealthStateHealthy,
   436  			Message: stringPtr("Healthy state"),
   437  			Params: map[string]interface{}{
   438  				"counter": 0,
   439  			},
   440  		},
   441  	}, status.Checks)
   442  
   443  	// health has been checked: wait for grace period to pass and then unpause the health routine
   444  	time.Sleep(gracePeriod)
   445  	pauseChan <- struct{}{}
   446  	<-doneChan
   447  
   448  	// health check should be unhealthy: the most recent check ran within the grace period, but the last success was before grace period
   449  	status = source.HealthStatus(ctx)
   450  	assert.Equal(t, map[health.CheckType]health.HealthCheckResult{
   451  		checkType: {
   452  			Type:    checkType,
   453  			State:   health.HealthStateError,
   454  			Message: stringPtr("No successful checks during 100ms grace period: Error state"),
   455  			Params: map[string]interface{}{
   456  				"counter": 2,
   457  			},
   458  		},
   459  	}, status.Checks)
   460  
   461  	// wait for grace period
   462  	time.Sleep(gracePeriod)
   463  
   464  	// health check should be unhealthy: no check ran within grace period, and last known status was unhealthy
   465  	status = source.HealthStatus(ctx)
   466  	assert.Equal(t, map[health.CheckType]health.HealthCheckResult{
   467  		checkType: {
   468  			Type:    checkType,
   469  			State:   health.HealthStateError,
   470  			Message: stringPtr("No completed checks during 100ms grace period: Error state"),
   471  			Params: map[string]interface{}{
   472  				"counter": 2,
   473  			},
   474  		},
   475  	}, status.Checks)
   476  }