github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/health_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 log "github.com/hashicorp/go-hclog" 10 "github.com/hashicorp/nomad/client/allochealth" 11 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 12 "github.com/hashicorp/nomad/client/consul" 13 cstructs "github.com/hashicorp/nomad/client/structs" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 // healthMutator is able to set/clear alloc health. 18 type healthSetter interface { 19 // HasHealth returns true if health is already set. 20 HasHealth() bool 21 22 // Set health via the mutator 23 SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent) 24 25 // Clear health when the deployment ID changes 26 ClearHealth() 27 } 28 29 // allocHealthWatcherHook is responsible for watching an allocation's task 30 // status and (optionally) Consul health check status to determine if the 31 // allocation is health or unhealthy. Used by deployments and migrations. 32 type allocHealthWatcherHook struct { 33 healthSetter healthSetter 34 35 // consul client used to monitor health checks 36 consul consul.ConsulServiceAPI 37 38 // listener is given to trackers to listen for alloc updates and closed 39 // when the alloc is destroyed. 40 listener *cstructs.AllocListener 41 42 // hookLock is held by hook methods to prevent concurrent access by 43 // Update and synchronous hooks. 44 hookLock sync.Mutex 45 46 // watchDone is created before calling watchHealth and is closed when 47 // watchHealth exits. Must be passed into watchHealth to avoid races. 48 // Initialized already closed as Update may be called before Prerun. 49 watchDone chan struct{} 50 51 // ranOnce is set once Prerun or Update have run at least once. This 52 // prevents Prerun from running if an Update has already been 53 // processed. Must hold hookLock to access. 54 ranOnce bool 55 56 // cancelFn stops the health watching/setting goroutine. Wait on 57 // watchLock to block until the watcher exits. 58 cancelFn context.CancelFunc 59 60 // alloc set by new func or Update. Must hold hookLock to access. 61 alloc *structs.Allocation 62 63 // isDeploy is true if monitoring a deployment. Set in init(). Must 64 // hold hookLock to access. 65 isDeploy bool 66 67 logger log.Logger 68 } 69 70 func newAllocHealthWatcherHook(logger log.Logger, alloc *structs.Allocation, hs healthSetter, 71 listener *cstructs.AllocListener, consul consul.ConsulServiceAPI) interfaces.RunnerHook { 72 73 // Neither deployments nor migrations care about the health of 74 // non-service jobs so never watch their health 75 if alloc.Job.Type != structs.JobTypeService { 76 return noopAllocHealthWatcherHook{} 77 } 78 79 // Initialize watchDone with a closed chan in case Update runs before Prerun 80 closedDone := make(chan struct{}) 81 close(closedDone) 82 83 h := &allocHealthWatcherHook{ 84 alloc: alloc, 85 cancelFn: func() {}, // initialize to prevent nil func panics 86 watchDone: closedDone, 87 consul: consul, 88 healthSetter: hs, 89 listener: listener, 90 } 91 92 h.logger = logger.Named(h.Name()) 93 return h 94 } 95 96 func (h *allocHealthWatcherHook) Name() string { 97 return "alloc_health_watcher" 98 } 99 100 // init starts the allochealth.Tracker and watchHealth goroutine on either 101 // Prerun or Update. Caller must set/update alloc and logger fields. 102 // 103 // Not threadsafe so the caller should lock since Updates occur concurrently. 104 func (h *allocHealthWatcherHook) init() error { 105 // No need to watch health as it's already set 106 if h.healthSetter.HasHealth() { 107 h.logger.Trace("not watching; already has health set") 108 return nil 109 } 110 111 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 112 if tg == nil { 113 return fmt.Errorf("task group %q does not exist in job %q", h.alloc.TaskGroup, h.alloc.Job.ID) 114 } 115 116 h.isDeploy = h.alloc.DeploymentID != "" 117 118 // No need to watch allocs for deployments that rely on operators 119 // manually setting health 120 if h.isDeploy && (tg.Update.IsEmpty() || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual) { 121 return nil 122 } 123 124 // Define the deadline, health method, min healthy time from the 125 // deployment if this is a deployment; otherwise from the migration 126 // strategy. 127 deadline, useChecks, minHealthyTime := getHealthParams(time.Now(), tg, h.isDeploy) 128 129 // Create a context that is canceled when the tracker should shutdown. 130 ctx := context.Background() 131 ctx, h.cancelFn = context.WithCancel(ctx) 132 133 h.logger.Trace("watching", "deadline", deadline, "checks", useChecks, "min_healthy_time", minHealthyTime) 134 // Create a new tracker, start it, and watch for health results. 135 tracker := allochealth.NewTracker(ctx, h.logger, h.alloc, 136 h.listener, h.consul, minHealthyTime, useChecks) 137 tracker.Start() 138 139 // Create a new done chan and start watching for health updates 140 h.watchDone = make(chan struct{}) 141 go h.watchHealth(ctx, deadline, tracker, h.watchDone) 142 return nil 143 } 144 145 func (h *allocHealthWatcherHook) Prerun() error { 146 h.hookLock.Lock() 147 defer h.hookLock.Unlock() 148 149 if h.ranOnce { 150 // An Update beat Prerun to running the watcher; noop 151 return nil 152 } 153 154 h.ranOnce = true 155 return h.init() 156 } 157 158 func (h *allocHealthWatcherHook) Update(req *interfaces.RunnerUpdateRequest) error { 159 h.hookLock.Lock() 160 defer h.hookLock.Unlock() 161 162 // Prevent Prerun from running after an Update 163 h.ranOnce = true 164 165 // Cancel the old watcher and create a new one 166 h.cancelFn() 167 168 // Wait until the watcher exits 169 <-h.watchDone 170 171 // Deployment has changed, reset status 172 if req.Alloc.DeploymentID != h.alloc.DeploymentID { 173 h.healthSetter.ClearHealth() 174 } 175 176 // Update alloc 177 h.alloc = req.Alloc 178 179 return h.init() 180 } 181 182 func (h *allocHealthWatcherHook) Postrun() error { 183 h.hookLock.Lock() 184 defer h.hookLock.Unlock() 185 186 h.cancelFn() 187 h.listener.Close() 188 189 // Wait until the watcher exits 190 <-h.watchDone 191 192 return nil 193 } 194 195 func (h *allocHealthWatcherHook) Shutdown() { 196 // Same as Postrun 197 h.Postrun() 198 } 199 200 // watchHealth watches alloc health until it is set, the alloc is stopped, the 201 // deadline is reached, or the context is canceled. watchHealth will be 202 // canceled and restarted on Updates so calls are serialized with a lock. 203 func (h *allocHealthWatcherHook) watchHealth(ctx context.Context, deadline time.Time, tracker *allochealth.Tracker, done chan<- struct{}) { 204 defer close(done) 205 206 // Default to unhealthy for the deadline reached case 207 healthy := false 208 209 select { 210 case <-ctx.Done(): 211 // Graceful shutdown 212 return 213 214 case <-tracker.AllocStoppedCh(): 215 // Allocation has stopped so no need to set health 216 return 217 218 case <-time.After(deadline.Sub(time.Now())): 219 // Time is up! Fallthrough to set unhealthy. 220 h.logger.Trace("deadline reached; setting unhealthy", "deadline", deadline) 221 222 case healthy = <-tracker.HealthyCh(): 223 // Health received. Fallthrough to set it. 224 } 225 226 h.logger.Trace("health set", "healthy", healthy) 227 228 // If this is an unhealthy deployment emit events for tasks 229 var taskEvents map[string]*structs.TaskEvent 230 if !healthy && h.isDeploy { 231 taskEvents = tracker.TaskEvents() 232 } 233 234 h.healthSetter.SetHealth(healthy, h.isDeploy, taskEvents) 235 } 236 237 // getHealthParams returns the health watcher parameters which vary based on 238 // whether this allocation is in a deployment or migration. 239 func getHealthParams(now time.Time, tg *structs.TaskGroup, isDeploy bool) (deadline time.Time, useChecks bool, minHealthyTime time.Duration) { 240 if isDeploy { 241 deadline = now.Add(tg.Update.HealthyDeadline) 242 minHealthyTime = tg.Update.MinHealthyTime 243 useChecks = tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks 244 } else { 245 strategy := tg.Migrate 246 if strategy == nil { 247 // For backwards compat with pre-0.8 allocations that 248 // don't have a migrate strategy set. 249 strategy = structs.DefaultMigrateStrategy() 250 } 251 252 deadline = now.Add(strategy.HealthyDeadline) 253 minHealthyTime = strategy.MinHealthyTime 254 useChecks = strategy.HealthCheck == structs.MigrateStrategyHealthChecks 255 } 256 return 257 } 258 259 // noopAllocHealthWatcherHook is an empty hook implementation returned by 260 // newAllocHealthWatcherHook when an allocation will never need its health 261 // monitored. 262 type noopAllocHealthWatcherHook struct{} 263 264 func (noopAllocHealthWatcherHook) Name() string { 265 return "alloc_health_watcher" 266 }