github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/health_hook_test.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"sync"
     5  	"testing"
     6  	"time"
     7  
     8  	consulapi "github.com/hashicorp/consul/api"
     9  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    10  	"github.com/hashicorp/nomad/client/consul"
    11  	cstructs "github.com/hashicorp/nomad/client/structs"
    12  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    13  	"github.com/hashicorp/nomad/helper/testlog"
    14  	"github.com/hashicorp/nomad/helper/uuid"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/stretchr/testify/assert"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  // statically assert health hook implements the expected interfaces
    22  var _ interfaces.RunnerPrerunHook = (*allocHealthWatcherHook)(nil)
    23  var _ interfaces.RunnerUpdateHook = (*allocHealthWatcherHook)(nil)
    24  var _ interfaces.RunnerPostrunHook = (*allocHealthWatcherHook)(nil)
    25  var _ interfaces.ShutdownHook = (*allocHealthWatcherHook)(nil)
    26  
    27  // allocHealth is emitted to a chan whenever SetHealth is called
    28  type allocHealth struct {
    29  	healthy    bool
    30  	taskEvents map[string]*structs.TaskEvent
    31  }
    32  
    33  // mockHealthSetter implements healthSetter that stores health internally
    34  type mockHealthSetter struct {
    35  	setCalls   int
    36  	clearCalls int
    37  	healthy    *bool
    38  	isDeploy   *bool
    39  	taskEvents map[string]*structs.TaskEvent
    40  	mu         sync.Mutex
    41  
    42  	healthCh chan allocHealth
    43  }
    44  
    45  // newMockHealthSetter returns a mock HealthSetter that emits all SetHealth
    46  // calls on a buffered chan. Callers who do need need notifications of health
    47  // changes may just create the struct directly.
    48  func newMockHealthSetter() *mockHealthSetter {
    49  	return &mockHealthSetter{
    50  		healthCh: make(chan allocHealth, 1),
    51  	}
    52  }
    53  
    54  func (m *mockHealthSetter) SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent) {
    55  	m.mu.Lock()
    56  	defer m.mu.Unlock()
    57  
    58  	m.setCalls++
    59  	m.healthy = &healthy
    60  	m.isDeploy = &isDeploy
    61  	m.taskEvents = taskEvents
    62  
    63  	if m.healthCh != nil {
    64  		m.healthCh <- allocHealth{healthy, taskEvents}
    65  	}
    66  }
    67  
    68  func (m *mockHealthSetter) ClearHealth() {
    69  	m.mu.Lock()
    70  	defer m.mu.Unlock()
    71  
    72  	m.clearCalls++
    73  	m.healthy = nil
    74  	m.isDeploy = nil
    75  	m.taskEvents = nil
    76  }
    77  
    78  func (m *mockHealthSetter) HasHealth() bool {
    79  	m.mu.Lock()
    80  	defer m.mu.Unlock()
    81  	return m.healthy != nil
    82  }
    83  
    84  // TestHealthHook_PrerunPostrun asserts a health hook does not error if it is
    85  // run and postrunned.
    86  func TestHealthHook_PrerunPostrun(t *testing.T) {
    87  	t.Parallel()
    88  	require := require.New(t)
    89  
    90  	logger := testlog.HCLogger(t)
    91  
    92  	b := cstructs.NewAllocBroadcaster(logger)
    93  	defer b.Close()
    94  
    95  	consul := consul.NewMockConsulServiceClient(t, logger)
    96  	hs := &mockHealthSetter{}
    97  
    98  	h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul)
    99  
   100  	// Assert we implemented the right interfaces
   101  	prerunh, ok := h.(interfaces.RunnerPrerunHook)
   102  	require.True(ok)
   103  	_, ok = h.(interfaces.RunnerUpdateHook)
   104  	require.True(ok)
   105  	postrunh, ok := h.(interfaces.RunnerPostrunHook)
   106  	require.True(ok)
   107  
   108  	// Prerun
   109  	require.NoError(prerunh.Prerun())
   110  
   111  	// Assert isDeploy is false (other tests peek at isDeploy to determine
   112  	// if an Update applied)
   113  	ahw := h.(*allocHealthWatcherHook)
   114  	ahw.hookLock.Lock()
   115  	assert.False(t, ahw.isDeploy)
   116  	ahw.hookLock.Unlock()
   117  
   118  	// Postrun
   119  	require.NoError(postrunh.Postrun())
   120  }
   121  
   122  // TestHealthHook_PrerunUpdatePostrun asserts Updates may be applied concurrently.
   123  func TestHealthHook_PrerunUpdatePostrun(t *testing.T) {
   124  	t.Parallel()
   125  	require := require.New(t)
   126  
   127  	alloc := mock.Alloc()
   128  
   129  	logger := testlog.HCLogger(t)
   130  	b := cstructs.NewAllocBroadcaster(logger)
   131  	defer b.Close()
   132  
   133  	consul := consul.NewMockConsulServiceClient(t, logger)
   134  	hs := &mockHealthSetter{}
   135  
   136  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
   137  
   138  	// Prerun
   139  	require.NoError(h.Prerun())
   140  
   141  	// Update multiple times in a goroutine to mimic Client behavior
   142  	// (Updates are concurrent with alloc runner but are applied serially).
   143  	errs := make(chan error, 2)
   144  	go func() {
   145  		defer close(errs)
   146  		for i := 0; i < cap(errs); i++ {
   147  			alloc.AllocModifyIndex++
   148  			errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc.Copy()})
   149  		}
   150  	}()
   151  
   152  	for err := range errs {
   153  		assert.NoError(t, err)
   154  	}
   155  
   156  	// Postrun
   157  	require.NoError(h.Postrun())
   158  }
   159  
   160  // TestHealthHook_UpdatePrerunPostrun asserts that a hook may have Update
   161  // called before Prerun.
   162  func TestHealthHook_UpdatePrerunPostrun(t *testing.T) {
   163  	t.Parallel()
   164  	require := require.New(t)
   165  
   166  	alloc := mock.Alloc()
   167  
   168  	logger := testlog.HCLogger(t)
   169  	b := cstructs.NewAllocBroadcaster(logger)
   170  	defer b.Close()
   171  
   172  	consul := consul.NewMockConsulServiceClient(t, logger)
   173  	hs := &mockHealthSetter{}
   174  
   175  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
   176  
   177  	// Set a DeploymentID to cause ClearHealth to be called
   178  	alloc.DeploymentID = uuid.Generate()
   179  
   180  	// Update in a goroutine to mimic Client behavior (Updates are
   181  	// concurrent with alloc runner).
   182  	errs := make(chan error, 1)
   183  	go func(alloc *structs.Allocation) {
   184  		errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc})
   185  		close(errs)
   186  	}(alloc.Copy())
   187  
   188  	for err := range errs {
   189  		assert.NoError(t, err)
   190  	}
   191  
   192  	// Prerun should be a noop
   193  	require.NoError(h.Prerun())
   194  
   195  	// Assert that the Update took affect by isDeploy being true
   196  	h.hookLock.Lock()
   197  	assert.True(t, h.isDeploy)
   198  	h.hookLock.Unlock()
   199  
   200  	// Postrun
   201  	require.NoError(h.Postrun())
   202  }
   203  
   204  // TestHealthHook_Postrun asserts that a hook may have only Postrun called.
   205  func TestHealthHook_Postrun(t *testing.T) {
   206  	t.Parallel()
   207  	require := require.New(t)
   208  
   209  	logger := testlog.HCLogger(t)
   210  	b := cstructs.NewAllocBroadcaster(logger)
   211  	defer b.Close()
   212  
   213  	consul := consul.NewMockConsulServiceClient(t, logger)
   214  	hs := &mockHealthSetter{}
   215  
   216  	h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
   217  
   218  	// Postrun
   219  	require.NoError(h.Postrun())
   220  }
   221  
   222  // TestHealthHook_SetHealth_healthy asserts SetHealth is called when health status is
   223  // set. Uses task state and health checks.
   224  func TestHealthHook_SetHealth_healthy(t *testing.T) {
   225  	t.Parallel()
   226  	require := require.New(t)
   227  
   228  	alloc := mock.Alloc()
   229  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   230  	task := alloc.Job.TaskGroups[0].Tasks[0]
   231  
   232  	// Synthesize running alloc and tasks
   233  	alloc.ClientStatus = structs.AllocClientStatusRunning
   234  	alloc.TaskStates = map[string]*structs.TaskState{
   235  		task.Name: {
   236  			State:     structs.TaskStateRunning,
   237  			StartedAt: time.Now(),
   238  		},
   239  	}
   240  
   241  	// Make Consul response
   242  	check := &consulapi.AgentCheck{
   243  		Name:   task.Services[0].Checks[0].Name,
   244  		Status: consulapi.HealthPassing,
   245  	}
   246  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   247  		task.Name: {
   248  			Services: map[string]*agentconsul.ServiceRegistration{
   249  				task.Services[0].Name: {
   250  					Service: &consulapi.AgentService{
   251  						ID:      "foo",
   252  						Service: task.Services[0].Name,
   253  					},
   254  					Checks: []*consulapi.AgentCheck{check},
   255  				},
   256  			},
   257  		},
   258  	}
   259  
   260  	logger := testlog.HCLogger(t)
   261  	b := cstructs.NewAllocBroadcaster(logger)
   262  	defer b.Close()
   263  
   264  	// Don't reply on the first call
   265  	called := false
   266  	consul := consul.NewMockConsulServiceClient(t, logger)
   267  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   268  		if !called {
   269  			called = true
   270  			return nil, nil
   271  		}
   272  
   273  		reg := &agentconsul.AllocRegistration{
   274  			Tasks: taskRegs,
   275  		}
   276  
   277  		return reg, nil
   278  	}
   279  
   280  	hs := newMockHealthSetter()
   281  
   282  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
   283  
   284  	// Prerun
   285  	require.NoError(h.Prerun())
   286  
   287  	// Wait for health to be set (healthy)
   288  	select {
   289  	case <-time.After(5 * time.Second):
   290  		t.Fatalf("timeout waiting for health to be set")
   291  	case health := <-hs.healthCh:
   292  		require.True(health.healthy)
   293  
   294  		// Healthy allocs shouldn't emit task events
   295  		ev := health.taskEvents[task.Name]
   296  		require.Nilf(ev, "%#v", health.taskEvents)
   297  	}
   298  
   299  	// Postrun
   300  	require.NoError(h.Postrun())
   301  }
   302  
   303  // TestHealthHook_SetHealth_unhealthy asserts SetHealth notices unhealthy allocs
   304  func TestHealthHook_SetHealth_unhealthy(t *testing.T) {
   305  	t.Parallel()
   306  	require := require.New(t)
   307  
   308  	alloc := mock.Alloc()
   309  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   310  	task := alloc.Job.TaskGroups[0].Tasks[0]
   311  
   312  	newCheck := task.Services[0].Checks[0].Copy()
   313  	newCheck.Name = "failing-check"
   314  	task.Services[0].Checks = append(task.Services[0].Checks, newCheck)
   315  
   316  	// Synthesize running alloc and tasks
   317  	alloc.ClientStatus = structs.AllocClientStatusRunning
   318  	alloc.TaskStates = map[string]*structs.TaskState{
   319  		task.Name: {
   320  			State:     structs.TaskStateRunning,
   321  			StartedAt: time.Now(),
   322  		},
   323  	}
   324  
   325  	// Make Consul response
   326  	checkHealthy := &consulapi.AgentCheck{
   327  		Name:   task.Services[0].Checks[0].Name,
   328  		Status: consulapi.HealthPassing,
   329  	}
   330  	checksUnhealthy := &consulapi.AgentCheck{
   331  		Name:   task.Services[0].Checks[1].Name,
   332  		Status: consulapi.HealthCritical,
   333  	}
   334  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   335  		task.Name: {
   336  			Services: map[string]*agentconsul.ServiceRegistration{
   337  				task.Services[0].Name: {
   338  					Service: &consulapi.AgentService{
   339  						ID:      "foo",
   340  						Service: task.Services[0].Name,
   341  					},
   342  					Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy},
   343  				},
   344  			},
   345  		},
   346  	}
   347  
   348  	logger := testlog.HCLogger(t)
   349  	b := cstructs.NewAllocBroadcaster(logger)
   350  	defer b.Close()
   351  
   352  	// Don't reply on the first call
   353  	called := false
   354  	consul := consul.NewMockConsulServiceClient(t, logger)
   355  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   356  		if !called {
   357  			called = true
   358  			return nil, nil
   359  		}
   360  
   361  		reg := &agentconsul.AllocRegistration{
   362  			Tasks: taskRegs,
   363  		}
   364  
   365  		return reg, nil
   366  	}
   367  
   368  	hs := newMockHealthSetter()
   369  
   370  	h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook)
   371  
   372  	// Prerun
   373  	require.NoError(h.Prerun())
   374  
   375  	// Wait to ensure we don't get a healthy status
   376  	select {
   377  	case <-time.After(2 * time.Second):
   378  		// great no healthy status
   379  	case health := <-hs.healthCh:
   380  		require.Fail("expected no health event", "got %v", health)
   381  	}
   382  
   383  	// Postrun
   384  	require.NoError(h.Postrun())
   385  }
   386  
   387  // TestHealthHook_SystemNoop asserts that system jobs return the noop tracker.
   388  func TestHealthHook_SystemNoop(t *testing.T) {
   389  	t.Parallel()
   390  
   391  	h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil)
   392  
   393  	// Assert that it's the noop impl
   394  	_, ok := h.(noopAllocHealthWatcherHook)
   395  	require.True(t, ok)
   396  
   397  	// Assert the noop impl does not implement any hooks
   398  	_, ok = h.(interfaces.RunnerPrerunHook)
   399  	require.False(t, ok)
   400  	_, ok = h.(interfaces.RunnerUpdateHook)
   401  	require.False(t, ok)
   402  	_, ok = h.(interfaces.RunnerPostrunHook)
   403  	require.False(t, ok)
   404  	_, ok = h.(interfaces.ShutdownHook)
   405  	require.False(t, ok)
   406  }
   407  
   408  // TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker.
   409  func TestHealthHook_BatchNoop(t *testing.T) {
   410  	t.Parallel()
   411  
   412  	h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil)
   413  
   414  	// Assert that it's the noop impl
   415  	_, ok := h.(noopAllocHealthWatcherHook)
   416  	require.True(t, ok)
   417  }