github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allochealth/tracker_test.go (about) 1 package allochealth 2 3 import ( 4 "context" 5 "fmt" 6 "sync/atomic" 7 "testing" 8 "time" 9 10 consulapi "github.com/hashicorp/consul/api" 11 "github.com/hashicorp/nomad/client/consul" 12 cstructs "github.com/hashicorp/nomad/client/structs" 13 agentconsul "github.com/hashicorp/nomad/command/agent/consul" 14 "github.com/hashicorp/nomad/helper/testlog" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/testutil" 18 "github.com/stretchr/testify/require" 19 ) 20 21 func TestTracker_Checks_Healthy(t *testing.T) { 22 t.Parallel() 23 24 alloc := mock.Alloc() 25 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 26 task := alloc.Job.TaskGroups[0].Tasks[0] 27 28 // Synthesize running alloc and tasks 29 alloc.ClientStatus = structs.AllocClientStatusRunning 30 alloc.TaskStates = map[string]*structs.TaskState{ 31 task.Name: { 32 State: structs.TaskStateRunning, 33 StartedAt: time.Now(), 34 }, 35 } 36 37 // Make Consul response 38 check := &consulapi.AgentCheck{ 39 Name: task.Services[0].Checks[0].Name, 40 Status: consulapi.HealthPassing, 41 } 42 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 43 task.Name: { 44 Services: map[string]*agentconsul.ServiceRegistration{ 45 task.Services[0].Name: { 46 Service: &consulapi.AgentService{ 47 ID: "foo", 48 Service: task.Services[0].Name, 49 }, 50 Checks: []*consulapi.AgentCheck{check}, 51 }, 52 }, 53 }, 54 } 55 56 logger := testlog.HCLogger(t) 57 b := cstructs.NewAllocBroadcaster(logger) 58 defer b.Close() 59 60 // Don't reply on the first call 61 var called uint64 62 consul := consul.NewMockConsulServiceClient(t, logger) 63 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 64 if atomic.AddUint64(&called, 1) == 1 { 65 return nil, nil 66 } 67 68 reg := &agentconsul.AllocRegistration{ 69 Tasks: taskRegs, 70 } 71 72 return reg, nil 73 } 74 75 ctx, cancelFn := context.WithCancel(context.Background()) 76 defer cancelFn() 77 78 checkInterval := 10 * time.Millisecond 79 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 80 time.Millisecond, true) 81 tracker.checkLookupInterval = checkInterval 82 tracker.Start() 83 84 select { 85 case <-time.After(4 * checkInterval): 86 require.Fail(t, "timed out while waiting for health") 87 case h := <-tracker.HealthyCh(): 88 require.True(t, h) 89 } 90 } 91 92 func TestTracker_Checks_Unhealthy(t *testing.T) { 93 t.Parallel() 94 95 alloc := mock.Alloc() 96 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 97 task := alloc.Job.TaskGroups[0].Tasks[0] 98 99 newCheck := task.Services[0].Checks[0].Copy() 100 newCheck.Name = "failing-check" 101 task.Services[0].Checks = append(task.Services[0].Checks, newCheck) 102 103 // Synthesize running alloc and tasks 104 alloc.ClientStatus = structs.AllocClientStatusRunning 105 alloc.TaskStates = map[string]*structs.TaskState{ 106 task.Name: { 107 State: structs.TaskStateRunning, 108 StartedAt: time.Now(), 109 }, 110 } 111 112 // Make Consul response 113 checkHealthy := &consulapi.AgentCheck{ 114 Name: task.Services[0].Checks[0].Name, 115 Status: consulapi.HealthPassing, 116 } 117 checksUnhealthy := &consulapi.AgentCheck{ 118 Name: task.Services[0].Checks[1].Name, 119 Status: consulapi.HealthCritical, 120 } 121 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 122 task.Name: { 123 Services: map[string]*agentconsul.ServiceRegistration{ 124 task.Services[0].Name: { 125 Service: &consulapi.AgentService{ 126 ID: "foo", 127 Service: task.Services[0].Name, 128 }, 129 Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, 130 }, 131 }, 132 }, 133 } 134 135 logger := testlog.HCLogger(t) 136 b := cstructs.NewAllocBroadcaster(logger) 137 defer b.Close() 138 139 // Don't reply on the first call 140 var called uint64 141 consul := consul.NewMockConsulServiceClient(t, logger) 142 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 143 if atomic.AddUint64(&called, 1) == 1 { 144 return nil, nil 145 } 146 147 reg := &agentconsul.AllocRegistration{ 148 Tasks: taskRegs, 149 } 150 151 return reg, nil 152 } 153 154 ctx, cancelFn := context.WithCancel(context.Background()) 155 defer cancelFn() 156 157 checkInterval := 10 * time.Millisecond 158 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 159 time.Millisecond, true) 160 tracker.checkLookupInterval = checkInterval 161 tracker.Start() 162 163 testutil.WaitForResult(func() (bool, error) { 164 lookup := atomic.LoadUint64(&called) 165 return lookup < 4, fmt.Errorf("wait to get more task registration lookups: %v", lookup) 166 }, func(err error) { 167 require.NoError(t, err) 168 }) 169 170 tracker.l.Lock() 171 require.False(t, tracker.checksHealthy) 172 tracker.l.Unlock() 173 174 select { 175 case v := <-tracker.HealthyCh(): 176 require.Failf(t, "expected no health value", " got %v", v) 177 default: 178 // good 179 } 180 } 181 182 func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) { 183 t.Parallel() 184 185 alloc := mock.Alloc() 186 logger := testlog.HCLogger(t) 187 188 ctx, cancelFn := context.WithCancel(context.Background()) 189 defer cancelFn() 190 191 tracker := NewTracker(ctx, logger, alloc, nil, nil, 192 time.Millisecond, true) 193 194 assertNoHealth := func() { 195 require.NoError(t, tracker.ctx.Err()) 196 select { 197 case v := <-tracker.HealthyCh(): 198 require.Failf(t, "unexpected healthy event", "got %v", v) 199 default: 200 } 201 } 202 203 // first set task health without checks 204 tracker.setTaskHealth(true, false) 205 assertNoHealth() 206 207 // now fail task health again before checks are successful 208 tracker.setTaskHealth(false, false) 209 assertNoHealth() 210 211 // now pass health checks - do not propagate health yet 212 tracker.setCheckHealth(true) 213 assertNoHealth() 214 215 // set tasks to healthy - don't propagate health yet, wait for the next check 216 tracker.setTaskHealth(true, false) 217 assertNoHealth() 218 219 // set checks to true, now propagate health status 220 tracker.setCheckHealth(true) 221 222 require.Error(t, tracker.ctx.Err()) 223 select { 224 case v := <-tracker.HealthyCh(): 225 require.True(t, v) 226 default: 227 require.Fail(t, "expected a health status") 228 } 229 } 230 231 // TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc 232 // healthy, if the checks pass before task health pass 233 func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) { 234 t.Parallel() 235 236 alloc := mock.Alloc() 237 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 238 task := alloc.Job.TaskGroups[0].Tasks[0] 239 240 // new task starting unhealthy, without services 241 task2 := task.Copy() 242 task2.Name = task2.Name + "2" 243 task2.Services = nil 244 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 245 246 // Synthesize running alloc and tasks 247 alloc.ClientStatus = structs.AllocClientStatusRunning 248 alloc.TaskStates = map[string]*structs.TaskState{ 249 task.Name: { 250 State: structs.TaskStateRunning, 251 StartedAt: time.Now(), 252 }, 253 task2.Name: { 254 State: structs.TaskStatePending, 255 }, 256 } 257 258 // Make Consul response 259 check := &consulapi.AgentCheck{ 260 Name: task.Services[0].Checks[0].Name, 261 Status: consulapi.HealthPassing, 262 } 263 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 264 task.Name: { 265 Services: map[string]*agentconsul.ServiceRegistration{ 266 task.Services[0].Name: { 267 Service: &consulapi.AgentService{ 268 ID: "foo", 269 Service: task.Services[0].Name, 270 }, 271 Checks: []*consulapi.AgentCheck{check}, 272 }, 273 }, 274 }, 275 } 276 277 logger := testlog.HCLogger(t) 278 b := cstructs.NewAllocBroadcaster(logger) 279 defer b.Close() 280 281 // Don't reply on the first call 282 var called uint64 283 consul := consul.NewMockConsulServiceClient(t, logger) 284 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 285 if atomic.AddUint64(&called, 1) == 1 { 286 return nil, nil 287 } 288 289 reg := &agentconsul.AllocRegistration{ 290 Tasks: taskRegs, 291 } 292 293 return reg, nil 294 } 295 296 ctx, cancelFn := context.WithCancel(context.Background()) 297 defer cancelFn() 298 299 checkInterval := 10 * time.Millisecond 300 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 301 time.Millisecond, true) 302 tracker.checkLookupInterval = checkInterval 303 tracker.Start() 304 305 // assert that we don't get marked healthy 306 select { 307 case <-time.After(4 * checkInterval): 308 // still unhealthy, good 309 case h := <-tracker.HealthyCh(): 310 require.Fail(t, "unexpected health event", h) 311 } 312 require.False(t, tracker.tasksHealthy) 313 require.False(t, tracker.checksHealthy) 314 315 // now set task to healthy 316 runningAlloc := alloc.Copy() 317 runningAlloc.TaskStates = map[string]*structs.TaskState{ 318 task.Name: { 319 State: structs.TaskStateRunning, 320 StartedAt: time.Now(), 321 }, 322 task2.Name: { 323 State: structs.TaskStateRunning, 324 StartedAt: time.Now(), 325 }, 326 } 327 err := b.Send(runningAlloc) 328 require.NoError(t, err) 329 330 // eventually, it is marked as healthy 331 select { 332 case <-time.After(4 * checkInterval): 333 require.Fail(t, "timed out while waiting for health") 334 case h := <-tracker.HealthyCh(): 335 require.True(t, h) 336 } 337 338 }