github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/health_hook_test.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"sync"
     5  	"testing"
     6  	"time"
     7  
     8  	consulapi "github.com/hashicorp/consul/api"
     9  	"github.com/hashicorp/nomad/ci"
    10  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    11  	"github.com/hashicorp/nomad/client/serviceregistration"
    12  	regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
    13  	cstructs "github.com/hashicorp/nomad/client/structs"
    14  	"github.com/hashicorp/nomad/helper/testlog"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/hashicorp/nomad/nomad/mock"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  // statically assert health hook implements the expected interfaces
    23  var _ interfaces.RunnerPrerunHook = (*allocHealthWatcherHook)(nil)
    24  var _ interfaces.RunnerUpdateHook = (*allocHealthWatcherHook)(nil)
    25  var _ interfaces.RunnerPostrunHook = (*allocHealthWatcherHook)(nil)
    26  var _ interfaces.ShutdownHook = (*allocHealthWatcherHook)(nil)
    27  
    28  // allocHealth is emitted to a chan whenever SetHealth is called
    29  type allocHealth struct {
    30  	healthy    bool
    31  	taskEvents map[string]*structs.TaskEvent
    32  }
    33  
    34  // mockHealthSetter implements healthSetter that stores health internally
    35  type mockHealthSetter struct {
    36  	setCalls   int
    37  	clearCalls int
    38  	healthy    *bool
    39  	isDeploy   *bool
    40  	taskEvents map[string]*structs.TaskEvent
    41  	mu         sync.Mutex
    42  
    43  	healthCh chan allocHealth
    44  }
    45  
    46  // newMockHealthSetter returns a mock HealthSetter that emits all SetHealth
    47  // calls on a buffered chan. Callers who do need need notifications of health
    48  // changes may just create the struct directly.
    49  func newMockHealthSetter() *mockHealthSetter {
    50  	return &mockHealthSetter{
    51  		healthCh: make(chan allocHealth, 1),
    52  	}
    53  }
    54  
    55  func (m *mockHealthSetter) SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent) {
    56  	m.mu.Lock()
    57  	defer m.mu.Unlock()
    58  
    59  	m.setCalls++
    60  	m.healthy = &healthy
    61  	m.isDeploy = &isDeploy
    62  	m.taskEvents = taskEvents
    63  
    64  	if m.healthCh != nil {
    65  		m.healthCh <- allocHealth{healthy, taskEvents}
    66  	}
    67  }
    68  
    69  func (m *mockHealthSetter) ClearHealth() {
    70  	m.mu.Lock()
    71  	defer m.mu.Unlock()
    72  
    73  	m.clearCalls++
    74  	m.healthy = nil
    75  	m.isDeploy = nil
    76  	m.taskEvents = nil
    77  }
    78  
    79  func (m *mockHealthSetter) HasHealth() bool {
    80  	m.mu.Lock()
    81  	defer m.mu.Unlock()
    82  	return m.healthy != nil
    83  }
    84  
    85  // TestHealthHook_PrerunPostrun asserts a health hook does not error if it is
    86  // run and postrunned.
    87  func TestHealthHook_PrerunPostrun(t *testing.T) {
    88  	ci.Parallel(t)
    89  	require := require.New(t)
    90  
    91  	logger := testlog.HCLogger(t)
    92  
    93  	b := cstructs.NewAllocBroadcaster(logger)
    94  	defer b.Close()
    95  
    96  	consul := regMock.NewServiceRegistrationHandler(logger)
    97  	hs := &mockHealthSetter{}
    98  
    99  	checks := new(mock.CheckShim)
   100  	h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks)
   101  
   102  	// Assert we implemented the right interfaces
   103  	prerunh, ok := h.(interfaces.RunnerPrerunHook)
   104  	require.True(ok)
   105  	_, ok = h.(interfaces.RunnerUpdateHook)
   106  	require.True(ok)
   107  	postrunh, ok := h.(interfaces.RunnerPostrunHook)
   108  	require.True(ok)
   109  
   110  	// Prerun
   111  	require.NoError(prerunh.Prerun())
   112  
   113  	// Assert isDeploy is false (other tests peek at isDeploy to determine
   114  	// if an Update applied)
   115  	ahw := h.(*allocHealthWatcherHook)
   116  	ahw.hookLock.Lock()
   117  	assert.False(t, ahw.isDeploy)
   118  	ahw.hookLock.Unlock()
   119  
   120  	// Postrun
   121  	require.NoError(postrunh.Postrun())
   122  }
   123  
   124  // TestHealthHook_PrerunUpdatePostrun asserts Updates may be applied concurrently.
   125  func TestHealthHook_PrerunUpdatePostrun(t *testing.T) {
   126  	ci.Parallel(t)
   127  	require := require.New(t)
   128  
   129  	alloc := mock.Alloc()
   130  
   131  	logger := testlog.HCLogger(t)
   132  	b := cstructs.NewAllocBroadcaster(logger)
   133  	defer b.Close()
   134  
   135  	consul := regMock.NewServiceRegistrationHandler(logger)
   136  	hs := &mockHealthSetter{}
   137  
   138  	checks := new(mock.CheckShim)
   139  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
   140  
   141  	// Prerun
   142  	require.NoError(h.Prerun())
   143  
   144  	// Update multiple times in a goroutine to mimic Client behavior
   145  	// (Updates are concurrent with alloc runner but are applied serially).
   146  	errs := make(chan error, 2)
   147  	go func() {
   148  		defer close(errs)
   149  		for i := 0; i < cap(errs); i++ {
   150  			alloc.AllocModifyIndex++
   151  			errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc.Copy()})
   152  		}
   153  	}()
   154  
   155  	for err := range errs {
   156  		assert.NoError(t, err)
   157  	}
   158  
   159  	// Postrun
   160  	require.NoError(h.Postrun())
   161  }
   162  
   163  // TestHealthHook_UpdatePrerunPostrun asserts that a hook may have Update
   164  // called before Prerun.
   165  func TestHealthHook_UpdatePrerunPostrun(t *testing.T) {
   166  	ci.Parallel(t)
   167  	require := require.New(t)
   168  
   169  	alloc := mock.Alloc()
   170  
   171  	logger := testlog.HCLogger(t)
   172  	b := cstructs.NewAllocBroadcaster(logger)
   173  	defer b.Close()
   174  
   175  	consul := regMock.NewServiceRegistrationHandler(logger)
   176  	hs := &mockHealthSetter{}
   177  
   178  	checks := new(mock.CheckShim)
   179  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
   180  
   181  	// Set a DeploymentID to cause ClearHealth to be called
   182  	alloc.DeploymentID = uuid.Generate()
   183  
   184  	// Update in a goroutine to mimic Client behavior (Updates are
   185  	// concurrent with alloc runner).
   186  	errs := make(chan error, 1)
   187  	go func(alloc *structs.Allocation) {
   188  		errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc})
   189  		close(errs)
   190  	}(alloc.Copy())
   191  
   192  	for err := range errs {
   193  		assert.NoError(t, err)
   194  	}
   195  
   196  	// Prerun should be a noop
   197  	require.NoError(h.Prerun())
   198  
   199  	// Assert that the Update took affect by isDeploy being true
   200  	h.hookLock.Lock()
   201  	assert.True(t, h.isDeploy)
   202  	h.hookLock.Unlock()
   203  
   204  	// Postrun
   205  	require.NoError(h.Postrun())
   206  }
   207  
   208  // TestHealthHook_Postrun asserts that a hook may have only Postrun called.
   209  func TestHealthHook_Postrun(t *testing.T) {
   210  	ci.Parallel(t)
   211  	require := require.New(t)
   212  
   213  	logger := testlog.HCLogger(t)
   214  	b := cstructs.NewAllocBroadcaster(logger)
   215  	defer b.Close()
   216  
   217  	consul := regMock.NewServiceRegistrationHandler(logger)
   218  	hs := &mockHealthSetter{}
   219  
   220  	checks := new(mock.CheckShim)
   221  	h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
   222  
   223  	// Postrun
   224  	require.NoError(h.Postrun())
   225  }
   226  
   227  // TestHealthHook_SetHealth_healthy asserts SetHealth is called when health status is
   228  // set. Uses task state and health checks.
   229  func TestHealthHook_SetHealth_healthy(t *testing.T) {
   230  	ci.Parallel(t)
   231  	require := require.New(t)
   232  
   233  	alloc := mock.Alloc()
   234  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   235  	task := alloc.Job.TaskGroups[0].Tasks[0]
   236  
   237  	// Synthesize running alloc and tasks
   238  	alloc.ClientStatus = structs.AllocClientStatusRunning
   239  	alloc.TaskStates = map[string]*structs.TaskState{
   240  		task.Name: {
   241  			State:     structs.TaskStateRunning,
   242  			StartedAt: time.Now(),
   243  		},
   244  	}
   245  
   246  	// Make Consul response
   247  	check := &consulapi.AgentCheck{
   248  		Name:   task.Services[0].Checks[0].Name,
   249  		Status: consulapi.HealthPassing,
   250  	}
   251  	taskRegs := map[string]*serviceregistration.ServiceRegistrations{
   252  		task.Name: {
   253  			Services: map[string]*serviceregistration.ServiceRegistration{
   254  				task.Services[0].Name: {
   255  					Service: &consulapi.AgentService{
   256  						ID:      "foo",
   257  						Service: task.Services[0].Name,
   258  					},
   259  					Checks: []*consulapi.AgentCheck{check},
   260  				},
   261  			},
   262  		},
   263  	}
   264  
   265  	logger := testlog.HCLogger(t)
   266  	b := cstructs.NewAllocBroadcaster(logger)
   267  	defer b.Close()
   268  
   269  	// Don't reply on the first call
   270  	called := false
   271  	consul := regMock.NewServiceRegistrationHandler(logger)
   272  	consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
   273  		if !called {
   274  			called = true
   275  			return nil, nil
   276  		}
   277  
   278  		reg := &serviceregistration.AllocRegistration{
   279  			Tasks: taskRegs,
   280  		}
   281  
   282  		return reg, nil
   283  	}
   284  
   285  	hs := newMockHealthSetter()
   286  
   287  	checks := new(mock.CheckShim)
   288  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
   289  
   290  	// Prerun
   291  	require.NoError(h.Prerun())
   292  
   293  	// Wait for health to be set (healthy)
   294  	select {
   295  	case <-time.After(5 * time.Second):
   296  		t.Fatalf("timeout waiting for health to be set")
   297  	case health := <-hs.healthCh:
   298  		require.True(health.healthy)
   299  
   300  		// Healthy allocs shouldn't emit task events
   301  		ev := health.taskEvents[task.Name]
   302  		require.Nilf(ev, "%#v", health.taskEvents)
   303  	}
   304  
   305  	// Postrun
   306  	require.NoError(h.Postrun())
   307  }
   308  
   309  // TestHealthHook_SetHealth_unhealthy asserts SetHealth notices unhealthy allocs
   310  func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
   311  	ci.Parallel(t)
   312  	require := require.New(t)
   313  
   314  	alloc := mock.Alloc()
   315  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   316  	task := alloc.Job.TaskGroups[0].Tasks[0]
   317  
   318  	newCheck := task.Services[0].Checks[0].Copy()
   319  	newCheck.Name = "failing-check"
   320  	task.Services[0].Checks = append(task.Services[0].Checks, newCheck)
   321  
   322  	// Synthesize running alloc and tasks
   323  	alloc.ClientStatus = structs.AllocClientStatusRunning
   324  	alloc.TaskStates = map[string]*structs.TaskState{
   325  		task.Name: {
   326  			State:     structs.TaskStateRunning,
   327  			StartedAt: time.Now(),
   328  		},
   329  	}
   330  
   331  	// Make Consul response
   332  	checkHealthy := &consulapi.AgentCheck{
   333  		Name:   task.Services[0].Checks[0].Name,
   334  		Status: consulapi.HealthPassing,
   335  	}
   336  	checksUnhealthy := &consulapi.AgentCheck{
   337  		Name:   task.Services[0].Checks[1].Name,
   338  		Status: consulapi.HealthCritical,
   339  	}
   340  	taskRegs := map[string]*serviceregistration.ServiceRegistrations{
   341  		task.Name: {
   342  			Services: map[string]*serviceregistration.ServiceRegistration{
   343  				task.Services[0].Name: {
   344  					Service: &consulapi.AgentService{
   345  						ID:      "foo",
   346  						Service: task.Services[0].Name,
   347  					},
   348  					Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy},
   349  				},
   350  			},
   351  		},
   352  	}
   353  
   354  	logger := testlog.HCLogger(t)
   355  	b := cstructs.NewAllocBroadcaster(logger)
   356  	defer b.Close()
   357  
   358  	// Don't reply on the first call
   359  	called := false
   360  	consul := regMock.NewServiceRegistrationHandler(logger)
   361  	consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) {
   362  		if !called {
   363  			called = true
   364  			return nil, nil
   365  		}
   366  
   367  		reg := &serviceregistration.AllocRegistration{
   368  			Tasks: taskRegs,
   369  		}
   370  
   371  		return reg, nil
   372  	}
   373  
   374  	hs := newMockHealthSetter()
   375  
   376  	checks := new(mock.CheckShim)
   377  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook)
   378  
   379  	// Prerun
   380  	require.NoError(h.Prerun())
   381  
   382  	// Wait to ensure we don't get a healthy status
   383  	select {
   384  	case <-time.After(2 * time.Second):
   385  		// great no healthy status
   386  	case health := <-hs.healthCh:
   387  		require.Fail("expected no health event", "got %v", health)
   388  	}
   389  
   390  	// Postrun
   391  	require.NoError(h.Postrun())
   392  }
   393  
   394  // TestHealthHook_SystemNoop asserts that system jobs return the noop tracker.
   395  func TestHealthHook_SystemNoop(t *testing.T) {
   396  	ci.Parallel(t)
   397  
   398  	h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil, nil)
   399  
   400  	// Assert that it's the noop impl
   401  	_, ok := h.(noopAllocHealthWatcherHook)
   402  	require.True(t, ok)
   403  
   404  	// Assert the noop impl does not implement any hooks
   405  	_, ok = h.(interfaces.RunnerPrerunHook)
   406  	require.False(t, ok)
   407  	_, ok = h.(interfaces.RunnerUpdateHook)
   408  	require.False(t, ok)
   409  	_, ok = h.(interfaces.RunnerPostrunHook)
   410  	require.False(t, ok)
   411  	_, ok = h.(interfaces.ShutdownHook)
   412  	require.False(t, ok)
   413  }
   414  
   415  // TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker.
   416  func TestHealthHook_BatchNoop(t *testing.T) {
   417  	ci.Parallel(t)
   418  
   419  	h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil, nil)
   420  
   421  	// Assert that it's the noop impl
   422  	_, ok := h.(noopAllocHealthWatcherHook)
   423  	require.True(t, ok)
   424  }