github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allochealth/tracker_test.go (about)

     1  package allochealth
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync/atomic"
     7  	"testing"
     8  	"time"
     9  
    10  	consulapi "github.com/hashicorp/consul/api"
    11  	"github.com/hashicorp/nomad/client/consul"
    12  	cstructs "github.com/hashicorp/nomad/client/structs"
    13  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    14  	"github.com/hashicorp/nomad/helper/testlog"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/testutil"
    18  	"github.com/stretchr/testify/require"
    19  )
    20  
    21  func TestTracker_Checks_Healthy(t *testing.T) {
    22  	t.Parallel()
    23  
    24  	alloc := mock.Alloc()
    25  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
    26  	task := alloc.Job.TaskGroups[0].Tasks[0]
    27  
    28  	// Synthesize running alloc and tasks
    29  	alloc.ClientStatus = structs.AllocClientStatusRunning
    30  	alloc.TaskStates = map[string]*structs.TaskState{
    31  		task.Name: {
    32  			State:     structs.TaskStateRunning,
    33  			StartedAt: time.Now(),
    34  		},
    35  	}
    36  
    37  	// Make Consul response
    38  	check := &consulapi.AgentCheck{
    39  		Name:   task.Services[0].Checks[0].Name,
    40  		Status: consulapi.HealthPassing,
    41  	}
    42  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
    43  		task.Name: {
    44  			Services: map[string]*agentconsul.ServiceRegistration{
    45  				task.Services[0].Name: {
    46  					Service: &consulapi.AgentService{
    47  						ID:      "foo",
    48  						Service: task.Services[0].Name,
    49  					},
    50  					Checks: []*consulapi.AgentCheck{check},
    51  				},
    52  			},
    53  		},
    54  	}
    55  
    56  	logger := testlog.HCLogger(t)
    57  	b := cstructs.NewAllocBroadcaster(logger)
    58  	defer b.Close()
    59  
    60  	// Don't reply on the first call
    61  	var called uint64
    62  	consul := consul.NewMockConsulServiceClient(t, logger)
    63  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
    64  		if atomic.AddUint64(&called, 1) == 1 {
    65  			return nil, nil
    66  		}
    67  
    68  		reg := &agentconsul.AllocRegistration{
    69  			Tasks: taskRegs,
    70  		}
    71  
    72  		return reg, nil
    73  	}
    74  
    75  	ctx, cancelFn := context.WithCancel(context.Background())
    76  	defer cancelFn()
    77  
    78  	checkInterval := 10 * time.Millisecond
    79  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
    80  		time.Millisecond, true)
    81  	tracker.checkLookupInterval = checkInterval
    82  	tracker.Start()
    83  
    84  	select {
    85  	case <-time.After(4 * checkInterval):
    86  		require.Fail(t, "timed out while waiting for health")
    87  	case h := <-tracker.HealthyCh():
    88  		require.True(t, h)
    89  	}
    90  }
    91  
    92  func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
    93  	t.Parallel()
    94  
    95  	alloc := mock.LifecycleAllocWithPoststopDeploy()
    96  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
    97  
    98  	// Synthesize running alloc and tasks
    99  	alloc.ClientStatus = structs.AllocClientStatusRunning
   100  	alloc.TaskStates = map[string]*structs.TaskState{
   101  		"web": {
   102  			State:     structs.TaskStateRunning,
   103  			StartedAt: time.Now(),
   104  		},
   105  		"post": {
   106  			State: structs.TaskStatePending,
   107  		},
   108  	}
   109  
   110  	logger := testlog.HCLogger(t)
   111  	b := cstructs.NewAllocBroadcaster(logger)
   112  	defer b.Close()
   113  
   114  	consul := consul.NewMockConsulServiceClient(t, logger)
   115  	ctx, cancelFn := context.WithCancel(context.Background())
   116  	defer cancelFn()
   117  
   118  	checkInterval := 10 * time.Millisecond
   119  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   120  		time.Millisecond, true)
   121  	tracker.checkLookupInterval = checkInterval
   122  	tracker.Start()
   123  
   124  	select {
   125  	case <-time.After(4 * checkInterval):
   126  		require.Fail(t, "timed out while waiting for health")
   127  	case h := <-tracker.HealthyCh():
   128  		require.True(t, h)
   129  	}
   130  }
   131  
   132  func TestTracker_Checks_Unhealthy(t *testing.T) {
   133  	t.Parallel()
   134  
   135  	alloc := mock.Alloc()
   136  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   137  	task := alloc.Job.TaskGroups[0].Tasks[0]
   138  
   139  	newCheck := task.Services[0].Checks[0].Copy()
   140  	newCheck.Name = "failing-check"
   141  	task.Services[0].Checks = append(task.Services[0].Checks, newCheck)
   142  
   143  	// Synthesize running alloc and tasks
   144  	alloc.ClientStatus = structs.AllocClientStatusRunning
   145  	alloc.TaskStates = map[string]*structs.TaskState{
   146  		task.Name: {
   147  			State:     structs.TaskStateRunning,
   148  			StartedAt: time.Now(),
   149  		},
   150  	}
   151  
   152  	// Make Consul response
   153  	checkHealthy := &consulapi.AgentCheck{
   154  		Name:   task.Services[0].Checks[0].Name,
   155  		Status: consulapi.HealthPassing,
   156  	}
   157  	checksUnhealthy := &consulapi.AgentCheck{
   158  		Name:   task.Services[0].Checks[1].Name,
   159  		Status: consulapi.HealthCritical,
   160  	}
   161  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   162  		task.Name: {
   163  			Services: map[string]*agentconsul.ServiceRegistration{
   164  				task.Services[0].Name: {
   165  					Service: &consulapi.AgentService{
   166  						ID:      "foo",
   167  						Service: task.Services[0].Name,
   168  					},
   169  					Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy},
   170  				},
   171  			},
   172  		},
   173  	}
   174  
   175  	logger := testlog.HCLogger(t)
   176  	b := cstructs.NewAllocBroadcaster(logger)
   177  	defer b.Close()
   178  
   179  	// Don't reply on the first call
   180  	var called uint64
   181  	consul := consul.NewMockConsulServiceClient(t, logger)
   182  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   183  		if atomic.AddUint64(&called, 1) == 1 {
   184  			return nil, nil
   185  		}
   186  
   187  		reg := &agentconsul.AllocRegistration{
   188  			Tasks: taskRegs,
   189  		}
   190  
   191  		return reg, nil
   192  	}
   193  
   194  	ctx, cancelFn := context.WithCancel(context.Background())
   195  	defer cancelFn()
   196  
   197  	checkInterval := 10 * time.Millisecond
   198  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   199  		time.Millisecond, true)
   200  	tracker.checkLookupInterval = checkInterval
   201  	tracker.Start()
   202  
   203  	testutil.WaitForResult(func() (bool, error) {
   204  		lookup := atomic.LoadUint64(&called)
   205  		return lookup < 4, fmt.Errorf("wait to get more task registration lookups: %v", lookup)
   206  	}, func(err error) {
   207  		require.NoError(t, err)
   208  	})
   209  
   210  	tracker.l.Lock()
   211  	require.False(t, tracker.checksHealthy)
   212  	tracker.l.Unlock()
   213  
   214  	select {
   215  	case v := <-tracker.HealthyCh():
   216  		require.Failf(t, "expected no health value", " got %v", v)
   217  	default:
   218  		// good
   219  	}
   220  }
   221  
   222  func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) {
   223  	t.Parallel()
   224  
   225  	alloc := mock.Alloc()
   226  	logger := testlog.HCLogger(t)
   227  
   228  	ctx, cancelFn := context.WithCancel(context.Background())
   229  	defer cancelFn()
   230  
   231  	tracker := NewTracker(ctx, logger, alloc, nil, nil,
   232  		time.Millisecond, true)
   233  
   234  	assertNoHealth := func() {
   235  		require.NoError(t, tracker.ctx.Err())
   236  		select {
   237  		case v := <-tracker.HealthyCh():
   238  			require.Failf(t, "unexpected healthy event", "got %v", v)
   239  		default:
   240  		}
   241  	}
   242  
   243  	// first set task health without checks
   244  	tracker.setTaskHealth(true, false)
   245  	assertNoHealth()
   246  
   247  	// now fail task health again before checks are successful
   248  	tracker.setTaskHealth(false, false)
   249  	assertNoHealth()
   250  
   251  	// now pass health checks - do not propagate health yet
   252  	tracker.setCheckHealth(true)
   253  	assertNoHealth()
   254  
   255  	// set tasks to healthy - don't propagate health yet, wait for the next check
   256  	tracker.setTaskHealth(true, false)
   257  	assertNoHealth()
   258  
   259  	// set checks to true, now propagate health status
   260  	tracker.setCheckHealth(true)
   261  
   262  	require.Error(t, tracker.ctx.Err())
   263  	select {
   264  	case v := <-tracker.HealthyCh():
   265  		require.True(t, v)
   266  	default:
   267  		require.Fail(t, "expected a health status")
   268  	}
   269  }
   270  
   271  // TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc
   272  // healthy, if the checks pass before task health pass
   273  func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) {
   274  	t.Parallel()
   275  
   276  	alloc := mock.Alloc()
   277  	alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   278  	task := alloc.Job.TaskGroups[0].Tasks[0]
   279  
   280  	// new task starting unhealthy, without services
   281  	task2 := task.Copy()
   282  	task2.Name = task2.Name + "2"
   283  	task2.Services = nil
   284  	alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2)
   285  
   286  	// Synthesize running alloc and tasks
   287  	alloc.ClientStatus = structs.AllocClientStatusRunning
   288  	alloc.TaskStates = map[string]*structs.TaskState{
   289  		task.Name: {
   290  			State:     structs.TaskStateRunning,
   291  			StartedAt: time.Now(),
   292  		},
   293  		task2.Name: {
   294  			State: structs.TaskStatePending,
   295  		},
   296  	}
   297  
   298  	// Make Consul response
   299  	check := &consulapi.AgentCheck{
   300  		Name:   task.Services[0].Checks[0].Name,
   301  		Status: consulapi.HealthPassing,
   302  	}
   303  	taskRegs := map[string]*agentconsul.ServiceRegistrations{
   304  		task.Name: {
   305  			Services: map[string]*agentconsul.ServiceRegistration{
   306  				task.Services[0].Name: {
   307  					Service: &consulapi.AgentService{
   308  						ID:      "foo",
   309  						Service: task.Services[0].Name,
   310  					},
   311  					Checks: []*consulapi.AgentCheck{check},
   312  				},
   313  			},
   314  		},
   315  	}
   316  
   317  	logger := testlog.HCLogger(t)
   318  	b := cstructs.NewAllocBroadcaster(logger)
   319  	defer b.Close()
   320  
   321  	// Don't reply on the first call
   322  	var called uint64
   323  	consul := consul.NewMockConsulServiceClient(t, logger)
   324  	consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   325  		if atomic.AddUint64(&called, 1) == 1 {
   326  			return nil, nil
   327  		}
   328  
   329  		reg := &agentconsul.AllocRegistration{
   330  			Tasks: taskRegs,
   331  		}
   332  
   333  		return reg, nil
   334  	}
   335  
   336  	ctx, cancelFn := context.WithCancel(context.Background())
   337  	defer cancelFn()
   338  
   339  	checkInterval := 10 * time.Millisecond
   340  	tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   341  		time.Millisecond, true)
   342  	tracker.checkLookupInterval = checkInterval
   343  	tracker.Start()
   344  
   345  	// assert that we don't get marked healthy
   346  	select {
   347  	case <-time.After(4 * checkInterval):
   348  		// still unhealthy, good
   349  	case h := <-tracker.HealthyCh():
   350  		require.Fail(t, "unexpected health event", h)
   351  	}
   352  	require.False(t, tracker.tasksHealthy)
   353  	require.False(t, tracker.checksHealthy)
   354  
   355  	// now set task to healthy
   356  	runningAlloc := alloc.Copy()
   357  	runningAlloc.TaskStates = map[string]*structs.TaskState{
   358  		task.Name: {
   359  			State:     structs.TaskStateRunning,
   360  			StartedAt: time.Now(),
   361  		},
   362  		task2.Name: {
   363  			State:     structs.TaskStateRunning,
   364  			StartedAt: time.Now(),
   365  		},
   366  	}
   367  	err := b.Send(runningAlloc)
   368  	require.NoError(t, err)
   369  
   370  	// eventually, it is marked as healthy
   371  	select {
   372  	case <-time.After(4 * checkInterval):
   373  		require.Fail(t, "timed out while waiting for health")
   374  	case h := <-tracker.HealthyCh():
   375  		require.True(t, h)
   376  	}
   377  
   378  }
   379  
   380  func TestTracker_Checks_OnUpdate(t *testing.T) {
   381  	t.Parallel()
   382  
   383  	cases := []struct {
   384  		desc          string
   385  		checkOnUpdate string
   386  		consulResp    string
   387  		expectedPass  bool
   388  	}{
   389  		{
   390  			desc:          "check require_healthy consul healthy",
   391  			checkOnUpdate: structs.OnUpdateRequireHealthy,
   392  			consulResp:    consulapi.HealthPassing,
   393  			expectedPass:  true,
   394  		},
   395  		{
   396  			desc:          "check on_update ignore_warning, consul warn",
   397  			checkOnUpdate: structs.OnUpdateIgnoreWarn,
   398  			consulResp:    consulapi.HealthWarning,
   399  			expectedPass:  true,
   400  		},
   401  		{
   402  			desc:          "check on_update ignore_warning, consul critical",
   403  			checkOnUpdate: structs.OnUpdateIgnoreWarn,
   404  			consulResp:    consulapi.HealthCritical,
   405  			expectedPass:  false,
   406  		},
   407  		{
   408  			desc:          "check on_update ignore_warning, consul healthy",
   409  			checkOnUpdate: structs.OnUpdateIgnoreWarn,
   410  			consulResp:    consulapi.HealthPassing,
   411  			expectedPass:  true,
   412  		},
   413  		{
   414  			desc:          "check on_update ignore, consul critical",
   415  			checkOnUpdate: structs.OnUpdateIgnore,
   416  			consulResp:    consulapi.HealthCritical,
   417  			expectedPass:  true,
   418  		},
   419  	}
   420  
   421  	for _, tc := range cases {
   422  		t.Run(tc.desc, func(t *testing.T) {
   423  
   424  			alloc := mock.Alloc()
   425  			alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up
   426  			task := alloc.Job.TaskGroups[0].Tasks[0]
   427  
   428  			// Synthesize running alloc and tasks
   429  			alloc.ClientStatus = structs.AllocClientStatusRunning
   430  			alloc.TaskStates = map[string]*structs.TaskState{
   431  				task.Name: {
   432  					State:     structs.TaskStateRunning,
   433  					StartedAt: time.Now(),
   434  				},
   435  			}
   436  
   437  			// Make Consul response
   438  			check := &consulapi.AgentCheck{
   439  				Name:   task.Services[0].Checks[0].Name,
   440  				Status: tc.consulResp,
   441  			}
   442  			taskRegs := map[string]*agentconsul.ServiceRegistrations{
   443  				task.Name: {
   444  					Services: map[string]*agentconsul.ServiceRegistration{
   445  						task.Services[0].Name: {
   446  							Service: &consulapi.AgentService{
   447  								ID:      "foo",
   448  								Service: task.Services[0].Name,
   449  							},
   450  							Checks: []*consulapi.AgentCheck{check},
   451  							CheckOnUpdate: map[string]string{
   452  								check.CheckID: tc.checkOnUpdate,
   453  							},
   454  						},
   455  					},
   456  				},
   457  			}
   458  
   459  			logger := testlog.HCLogger(t)
   460  			b := cstructs.NewAllocBroadcaster(logger)
   461  			defer b.Close()
   462  
   463  			// Don't reply on the first call
   464  			var called uint64
   465  			consul := consul.NewMockConsulServiceClient(t, logger)
   466  			consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) {
   467  				if atomic.AddUint64(&called, 1) == 1 {
   468  					return nil, nil
   469  				}
   470  
   471  				reg := &agentconsul.AllocRegistration{
   472  					Tasks: taskRegs,
   473  				}
   474  
   475  				return reg, nil
   476  			}
   477  
   478  			ctx, cancelFn := context.WithCancel(context.Background())
   479  			defer cancelFn()
   480  
   481  			checkInterval := 10 * time.Millisecond
   482  			tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
   483  				time.Millisecond, true)
   484  			tracker.checkLookupInterval = checkInterval
   485  			tracker.Start()
   486  
   487  			select {
   488  			case <-time.After(4 * checkInterval):
   489  				if !tc.expectedPass {
   490  					// tracker should still be running
   491  					require.Nil(t, tracker.ctx.Err())
   492  					return
   493  				}
   494  				require.Fail(t, "timed out while waiting for health")
   495  			case h := <-tracker.HealthyCh():
   496  				require.True(t, h)
   497  			}
   498  
   499  			// For healthy checks, the tracker should stop watching
   500  			select {
   501  			case <-tracker.ctx.Done():
   502  				// Ok, tracker should exit after reporting healthy
   503  			default:
   504  				require.Fail(t, "expected tracker to exit after reporting healthy")
   505  			}
   506  		})
   507  	}
   508  }