github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allochealth/tracker_test.go (about)

     1  package allochealth
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync/atomic"
     7  	"testing"
     8  	"time"
     9  
    10  	consulapi "github.com/hashicorp/consul/api"
    11  	"github.com/hashicorp/nomad/client/consul"
    12  	cstructs "github.com/hashicorp/nomad/client/structs"
    13  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    14  	"github.com/hashicorp/nomad/helper/testlog"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/testutil"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  func TestTracker_Checks_Healthy(t *testing.T) {
    22  	t.Parallel()
    23  
    24  	alloc := mock.Alloc()
    25  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
    26  	task := alloc.Job.TaskGroups[0].Tasks[0]
    27  
    28  	// Synthesize running alloc and tasks
    29  	alloc.ClientStatus = structs.AllocClientStatusRunning
    30  	alloc.TaskStates = map[string]*structs.TaskState{
    31  		task.Name: {
    32  			State:     structs.TaskStateRunning,
    33  			StartedAt: time.Now(),
    34  		},
    35  	}
    36  
    37  	// Make Consul response
    38  	check := &consulapi.AgentCheck{
    39  		Name:   task.Services[0].Checks[0].Name,
    40  		Status: consulapi.HealthPassing,
    41  	}
    42  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
    43  		task.Name: {
    44  			Services: map[string]*agentconsul.ServiceRegistration{
    45  				task.Services[0].Name: {
    46  					Service: &consulapi.AgentService{
    47  						ID:      "foo",
    48  						Service: task.Services[0].Name,
    49  					},
    50  					Checks: []*consulapi.AgentCheck{check},
    51  				},
    52  			},
    53  		},
    54  	}
    55  
    56  	logger := testlog.HCLogger(t)
    57  	b := cstructs.NewAllocBroadcaster(logger)
    58  	defer b.Close()
    59  
    60  	// Don't reply on the first call
    61  	var called uint64
    62  	consul := consul.NewMockConsulServiceClient(t, logger)
    63  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
    64  		if atomic.AddUint64(&called, 1) == 1 {
    65  			return nil, nil
    66  		}
    67  
    68  		reg := &agentconsul.AllocRegistration{
    69  			Tasks: taskRegs,
    70  		}
    71  
    72  		return reg, nil
    73  	}
    74  
    75  	ctx, cancelFn := context.WithCancel(context.Background())
    76  	defer cancelFn()
    77  
    78  	checkInterval := 10 * time.Millisecond
    79  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
    80  		time.Millisecond, true)
    81  	tracker.checkLookupInterval = checkInterval
    82  	tracker.Start()
    83  
    84  	select {
    85  	case <-time.After(4 * checkInterval):
    86  		require.Fail(t, "timed out while waiting for health")
    87  	case h := <-tracker.HealthyCh():
    88  		require.True(t, h)
    89  	}
    90  }
    91  
    92  func TestTracker_Checks_Unhealthy(t *testing.T) {
    93  	t.Parallel()
    94  
    95  	alloc := mock.Alloc()
    96  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
    97  	task := alloc.Job.TaskGroups[0].Tasks[0]
    98  
    99  	newCheck := task.Services[0].Checks[0].Copy()
   100  	newCheck.Name = "failing-check"
   101  	task.Services[0].Checks = append(task.Services[0].Checks, newCheck)
   102  
   103  	// Synthesize running alloc and tasks
   104  	alloc.ClientStatus = structs.AllocClientStatusRunning
   105  	alloc.TaskStates = map[string]*structs.TaskState{
   106  		task.Name: {
   107  			State:     structs.TaskStateRunning,
   108  			StartedAt: time.Now(),
   109  		},
   110  	}
   111  
   112  	// Make Consul response
   113  	checkHealthy := &consulapi.AgentCheck{
   114  		Name:   task.Services[0].Checks[0].Name,
   115  		Status: consulapi.HealthPassing,
   116  	}
   117  	checksUnhealthy := &consulapi.AgentCheck{
   118  		Name:   task.Services[0].Checks[1].Name,
   119  		Status: consulapi.HealthCritical,
   120  	}
   121  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   122  		task.Name: {
   123  			Services: map[string]*agentconsul.ServiceRegistration{
   124  				task.Services[0].Name: {
   125  					Service: &consulapi.AgentService{
   126  						ID:      "foo",
   127  						Service: task.Services[0].Name,
   128  					},
   129  					Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy},
   130  				},
   131  			},
   132  		},
   133  	}
   134  
   135  	logger := testlog.HCLogger(t)
   136  	b := cstructs.NewAllocBroadcaster(logger)
   137  	defer b.Close()
   138  
   139  	// Don't reply on the first call
   140  	var called uint64
   141  	consul := consul.NewMockConsulServiceClient(t, logger)
   142  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   143  		if atomic.AddUint64(&called, 1) == 1 {
   144  			return nil, nil
   145  		}
   146  
   147  		reg := &agentconsul.AllocRegistration{
   148  			Tasks: taskRegs,
   149  		}
   150  
   151  		return reg, nil
   152  	}
   153  
   154  	ctx, cancelFn := context.WithCancel(context.Background())
   155  	defer cancelFn()
   156  
   157  	checkInterval := 10 * time.Millisecond
   158  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   159  		time.Millisecond, true)
   160  	tracker.checkLookupInterval = checkInterval
   161  	tracker.Start()
   162  
   163  	testutil.WaitForResult(func() (bool, error) {
   164  		lookup := atomic.LoadUint64(&called)
   165  		return lookup < 4, fmt.Errorf("wait to get more task registration lookups: %v", lookup)
   166  	}, func(err error) {
   167  		require.NoError(t, err)
   168  	})
   169  
   170  	tracker.l.Lock()
   171  	require.False(t, tracker.checksHealthy)
   172  	tracker.l.Unlock()
   173  
   174  	select {
   175  	case v := <-tracker.HealthyCh():
   176  		require.Failf(t, "expected no health value", " got %v", v)
   177  	default:
   178  		// good
   179  	}
   180  }
   181  
   182  func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) {
   183  	t.Parallel()
   184  
   185  	alloc := mock.Alloc()
   186  	logger := testlog.HCLogger(t)
   187  
   188  	ctx, cancelFn := context.WithCancel(context.Background())
   189  	defer cancelFn()
   190  
   191  	tracker := NewTracker(ctx, logger, alloc, nil, nil,
   192  		time.Millisecond, true)
   193  
   194  	assertNoHealth := func() {
   195  		require.NoError(t, tracker.ctx.Err())
   196  		select {
   197  		case v := <-tracker.HealthyCh():
   198  			require.Failf(t, "unexpected healthy event", "got %v", v)
   199  		default:
   200  		}
   201  	}
   202  
   203  	// first set task health without checks
   204  	tracker.setTaskHealth(true, false)
   205  	assertNoHealth()
   206  
   207  	// now fail task health again before checks are successful
   208  	tracker.setTaskHealth(false, false)
   209  	assertNoHealth()
   210  
   211  	// now pass health checks - do not propagate health yet
   212  	tracker.setCheckHealth(true)
   213  	assertNoHealth()
   214  
   215  	// set tasks to healthy - don't propagate health yet, wait for the next check
   216  	tracker.setTaskHealth(true, false)
   217  	assertNoHealth()
   218  
   219  	// set checks to true, now propagate health status
   220  	tracker.setCheckHealth(true)
   221  
   222  	require.Error(t, tracker.ctx.Err())
   223  	select {
   224  	case v := <-tracker.HealthyCh():
   225  		require.True(t, v)
   226  	default:
   227  		require.Fail(t, "expected a health status")
   228  	}
   229  }
   230  
   231  // TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc
   232  // healthy, if the checks pass before task health pass
   233  func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
   234  	t.Parallel()
   235  
   236  	alloc := mock.Alloc()
   237  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   238  	task := alloc.Job.TaskGroups[0].Tasks[0]
   239  
   240  	// new task starting unhealthy, without services
   241  	task2 := task.Copy()
   242  	task2.Name = task2.Name + "2"
   243  	task2.Services = nil
   244  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
   245  
   246  	// Synthesize running alloc and tasks
   247  	alloc.ClientStatus = structs.AllocClientStatusRunning
   248  	alloc.TaskStates = map[string]*structs.TaskState{
   249  		task.Name: {
   250  			State:     structs.TaskStateRunning,
   251  			StartedAt: time.Now(),
   252  		},
   253  		task2.Name: {
   254  			State: structs.TaskStatePending,
   255  		},
   256  	}
   257  
   258  	// Make Consul response
   259  	check := &consulapi.AgentCheck{
   260  		Name:   task.Services[0].Checks[0].Name,
   261  		Status: consulapi.HealthPassing,
   262  	}
   263  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   264  		task.Name: {
   265  			Services: map[string]*agentconsul.ServiceRegistration{
   266  				task.Services[0].Name: {
   267  					Service: &consulapi.AgentService{
   268  						ID:      "foo",
   269  						Service: task.Services[0].Name,
   270  					},
   271  					Checks: []*consulapi.AgentCheck{check},
   272  				},
   273  			},
   274  		},
   275  	}
   276  
   277  	logger := testlog.HCLogger(t)
   278  	b := cstructs.NewAllocBroadcaster(logger)
   279  	defer b.Close()
   280  
   281  	// Don't reply on the first call
   282  	var called uint64
   283  	consul := consul.NewMockConsulServiceClient(t, logger)
   284  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   285  		if atomic.AddUint64(&called, 1) == 1 {
   286  			return nil, nil
   287  		}
   288  
   289  		reg := &agentconsul.AllocRegistration{
   290  			Tasks: taskRegs,
   291  		}
   292  
   293  		return reg, nil
   294  	}
   295  
   296  	ctx, cancelFn := context.WithCancel(context.Background())
   297  	defer cancelFn()
   298  
   299  	checkInterval := 10 * time.Millisecond
   300  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   301  		time.Millisecond, true)
   302  	tracker.checkLookupInterval = checkInterval
   303  	tracker.Start()
   304  
   305  	// assert that we don't get marked healthy
   306  	select {
   307  	case <-time.After(4 * checkInterval):
   308  		// still unhealthy, good
   309  	case h := <-tracker.HealthyCh():
   310  		require.Fail(t, "unexpected health event", h)
   311  	}
   312  	require.False(t, tracker.tasksHealthy)
   313  	require.False(t, tracker.checksHealthy)
   314  
   315  	// now set task to healthy
   316  	runningAlloc := alloc.Copy()
   317  	runningAlloc.TaskStates = map[string]*structs.TaskState{
   318  		task.Name: {
   319  			State:     structs.TaskStateRunning,
   320  			StartedAt: time.Now(),
   321  		},
   322  		task2.Name: {
   323  			State:     structs.TaskStateRunning,
   324  			StartedAt: time.Now(),
   325  		},
   326  	}
   327  	err := b.Send(runningAlloc)
   328  	require.NoError(t, err)
   329  
   330  	// eventually, it is marked as healthy
   331  	select {
   332  	case <-time.After(4 * checkInterval):
   333  		require.Fail(t, "timed out while waiting for health")
   334  	case h := <-tracker.HealthyCh():
   335  		require.True(t, h)
   336  	}
   337  
   338  }