github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/alloc_runner_hooks.go (about) 1 package allocrunner 2 3 import ( 4 "fmt" 5 "time" 6 7 multierror "github.com/hashicorp/go-multierror" 8 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 9 clientconfig "github.com/hashicorp/nomad/client/config" 10 cstructs "github.com/hashicorp/nomad/client/structs" 11 "github.com/hashicorp/nomad/client/taskenv" 12 "github.com/hashicorp/nomad/nomad/structs" 13 ) 14 15 type hookResourceSetter interface { 16 GetAllocHookResources() *cstructs.AllocHookResources 17 SetAllocHookResources(*cstructs.AllocHookResources) 18 } 19 20 type allocHookResourceSetter struct { 21 ar *allocRunner 22 } 23 24 func (a *allocHookResourceSetter) GetAllocHookResources() *cstructs.AllocHookResources { 25 a.ar.hookStateMu.RLock() 26 defer a.ar.hookStateMu.RUnlock() 27 28 return a.ar.hookState 29 } 30 31 func (a *allocHookResourceSetter) SetAllocHookResources(res *cstructs.AllocHookResources) { 32 a.ar.hookStateMu.Lock() 33 defer a.ar.hookStateMu.Unlock() 34 35 a.ar.hookState = res 36 37 // Propagate to all of the TRs within the lock to ensure consistent state. 38 // TODO: Refactor so TR's pull state from AR? 39 for _, tr := range a.ar.tasks { 40 tr.SetAllocHookResources(res) 41 } 42 } 43 44 // allocHealthSetter is a shim to allow the alloc health watcher hook to set 45 // and clear the alloc health without full access to the alloc runner state 46 type allocHealthSetter struct { 47 ar *allocRunner 48 } 49 50 // HasHealth returns true if a deployment status is already set. 51 func (a *allocHealthSetter) HasHealth() bool { 52 a.ar.stateLock.Lock() 53 defer a.ar.stateLock.Unlock() 54 return a.ar.state.DeploymentStatus.HasHealth() 55 } 56 57 // ClearHealth allows the health watcher hook to clear the alloc's deployment 58 // health if the deployment id changes. It does not update the server as the 59 // status is only cleared when already receiving an update from the server. 60 // 61 // Only for use by health hook. 62 func (a *allocHealthSetter) ClearHealth() { 63 a.ar.stateLock.Lock() 64 a.ar.state.ClearDeploymentStatus() 65 a.ar.persistDeploymentStatus(nil) 66 a.ar.stateLock.Unlock() 67 } 68 69 // SetHealth allows the health watcher hook to set the alloc's 70 // deployment/migration health and emit task events. 71 // 72 // Only for use by health hook. 73 func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents map[string]*structs.TaskEvent) { 74 // Updating alloc deployment state is tricky because it may be nil, but 75 // if it's not then we need to maintain the values of Canary and 76 // ModifyIndex as they're only mutated by the server. 77 a.ar.stateLock.Lock() 78 a.ar.state.SetDeploymentStatus(time.Now(), healthy) 79 a.ar.persistDeploymentStatus(a.ar.state.DeploymentStatus) 80 terminalDesiredState := a.ar.Alloc().ServerTerminalStatus() 81 a.ar.stateLock.Unlock() 82 83 // If deployment is unhealthy emit task events explaining why 84 if !healthy && isDeploy && !terminalDesiredState { 85 for task, event := range trackerTaskEvents { 86 if tr, ok := a.ar.tasks[task]; ok { 87 // Append but don't emit event since the server 88 // will be updated below 89 tr.AppendEvent(event) 90 } 91 } 92 } 93 94 // Gather the state of the other tasks 95 states := make(map[string]*structs.TaskState, len(a.ar.tasks)) 96 for name, tr := range a.ar.tasks { 97 states[name] = tr.TaskState() 98 } 99 100 // Build the client allocation 101 calloc := a.ar.clientAlloc(states) 102 103 // Update the server 104 a.ar.stateUpdater.AllocStateUpdated(calloc) 105 106 // Broadcast client alloc to listeners 107 a.ar.allocBroadcaster.Send(calloc) 108 } 109 110 // initRunnerHooks initializes the runners hooks. 111 func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { 112 hookLogger := ar.logger.Named("runner_hook") 113 114 // create health setting shim 115 hs := &allocHealthSetter{ar} 116 117 // create network isolation setting shim 118 ns := &allocNetworkIsolationSetter{ar: ar} 119 120 // create hook resource setting shim 121 hrs := &allocHookResourceSetter{ar: ar} 122 hrs.SetAllocHookResources(&cstructs.AllocHookResources{}) 123 124 // build the network manager 125 nm, err := newNetworkManager(ar.Alloc(), ar.driverManager) 126 if err != nil { 127 return fmt.Errorf("failed to configure network manager: %v", err) 128 } 129 130 // create network configurator 131 nc, err := newNetworkConfigurator(hookLogger, ar.Alloc(), config) 132 if err != nil { 133 return fmt.Errorf("failed to initialize network configurator: %v", err) 134 } 135 136 // Create a new taskenv.Builder which is used and mutated by networkHook. 137 envBuilder := taskenv.NewBuilder( 138 config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir) 139 140 // Create a taskenv.TaskEnv which is used for read only purposes by the 141 // newNetworkHook. 142 builtTaskEnv := envBuilder.Build() 143 144 // Create the alloc directory hook. This is run first to ensure the 145 // directory path exists for other hooks. 146 alloc := ar.Alloc() 147 ar.runnerHooks = []interfaces.RunnerHook{ 148 newAllocDirHook(hookLogger, ar.allocDir), 149 newCgroupHook(ar.Alloc(), ar.cpusetManager), 150 newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher), 151 newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir), 152 newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient, ar.checkStore), 153 newNetworkHook(hookLogger, ns, alloc, nm, nc, ar, builtTaskEnv), 154 newGroupServiceHook(groupServiceHookConfig{ 155 alloc: alloc, 156 providerNamespace: alloc.ServiceProviderNamespace(), 157 serviceRegWrapper: ar.serviceRegWrapper, 158 restarter: ar, 159 taskEnvBuilder: envBuilder, 160 networkStatus: ar, 161 logger: hookLogger, 162 shutdownDelayCtx: ar.shutdownDelayCtx, 163 }), 164 newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig, config.Node.Attributes), 165 newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), 166 newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID), 167 newChecksHook(hookLogger, alloc, ar.checkStore, ar), 168 } 169 170 return nil 171 } 172 173 // prerun is used to run the runners prerun hooks. 174 func (ar *allocRunner) prerun() error { 175 if ar.logger.IsTrace() { 176 start := time.Now() 177 ar.logger.Trace("running pre-run hooks", "start", start) 178 defer func() { 179 end := time.Now() 180 ar.logger.Trace("finished pre-run hooks", "end", end, "duration", end.Sub(start)) 181 }() 182 } 183 184 for _, hook := range ar.runnerHooks { 185 pre, ok := hook.(interfaces.RunnerPrerunHook) 186 if !ok { 187 continue 188 } 189 190 name := pre.Name() 191 var start time.Time 192 if ar.logger.IsTrace() { 193 start = time.Now() 194 ar.logger.Trace("running pre-run hook", "name", name, "start", start) 195 } 196 197 if err := pre.Prerun(); err != nil { 198 return fmt.Errorf("pre-run hook %q failed: %v", name, err) 199 } 200 201 if ar.logger.IsTrace() { 202 end := time.Now() 203 ar.logger.Trace("finished pre-run hook", "name", name, "end", end, "duration", end.Sub(start)) 204 } 205 } 206 207 return nil 208 } 209 210 // update runs the alloc runner update hooks. Update hooks are run 211 // asynchronously with all other alloc runner operations. 212 func (ar *allocRunner) update(update *structs.Allocation) error { 213 if ar.logger.IsTrace() { 214 start := time.Now() 215 ar.logger.Trace("running update hooks", "start", start) 216 defer func() { 217 end := time.Now() 218 ar.logger.Trace("finished update hooks", "end", end, "duration", end.Sub(start)) 219 }() 220 } 221 222 req := &interfaces.RunnerUpdateRequest{ 223 Alloc: update, 224 } 225 226 var merr multierror.Error 227 for _, hook := range ar.runnerHooks { 228 h, ok := hook.(interfaces.RunnerUpdateHook) 229 if !ok { 230 continue 231 } 232 233 name := h.Name() 234 var start time.Time 235 if ar.logger.IsTrace() { 236 start = time.Now() 237 ar.logger.Trace("running update hook", "name", name, "start", start) 238 } 239 240 if err := h.Update(req); err != nil { 241 merr.Errors = append(merr.Errors, fmt.Errorf("update hook %q failed: %v", name, err)) 242 } 243 244 if ar.logger.IsTrace() { 245 end := time.Now() 246 ar.logger.Trace("finished update hooks", "name", name, "end", end, "duration", end.Sub(start)) 247 } 248 } 249 250 return merr.ErrorOrNil() 251 } 252 253 // postrun is used to run the runners postrun hooks. 254 func (ar *allocRunner) postrun() error { 255 if ar.logger.IsTrace() { 256 start := time.Now() 257 ar.logger.Trace("running post-run hooks", "start", start) 258 defer func() { 259 end := time.Now() 260 ar.logger.Trace("finished post-run hooks", "end", end, "duration", end.Sub(start)) 261 }() 262 } 263 264 for _, hook := range ar.runnerHooks { 265 post, ok := hook.(interfaces.RunnerPostrunHook) 266 if !ok { 267 continue 268 } 269 270 name := post.Name() 271 var start time.Time 272 if ar.logger.IsTrace() { 273 start = time.Now() 274 ar.logger.Trace("running post-run hook", "name", name, "start", start) 275 } 276 277 if err := post.Postrun(); err != nil { 278 return fmt.Errorf("hook %q failed: %v", name, err) 279 } 280 281 if ar.logger.IsTrace() { 282 end := time.Now() 283 ar.logger.Trace("finished post-run hooks", "name", name, "end", end, "duration", end.Sub(start)) 284 } 285 } 286 287 return nil 288 } 289 290 // destroy is used to run the runners destroy hooks. All hooks are run and 291 // errors are returned as a multierror. 292 func (ar *allocRunner) destroy() error { 293 if ar.logger.IsTrace() { 294 start := time.Now() 295 ar.logger.Trace("running destroy hooks", "start", start) 296 defer func() { 297 end := time.Now() 298 ar.logger.Trace("finished destroy hooks", "end", end, "duration", end.Sub(start)) 299 }() 300 } 301 302 var merr multierror.Error 303 for _, hook := range ar.runnerHooks { 304 h, ok := hook.(interfaces.RunnerDestroyHook) 305 if !ok { 306 continue 307 } 308 309 name := h.Name() 310 var start time.Time 311 if ar.logger.IsTrace() { 312 start = time.Now() 313 ar.logger.Trace("running destroy hook", "name", name, "start", start) 314 } 315 316 if err := h.Destroy(); err != nil { 317 merr.Errors = append(merr.Errors, fmt.Errorf("destroy hook %q failed: %v", name, err)) 318 } 319 320 if ar.logger.IsTrace() { 321 end := time.Now() 322 ar.logger.Trace("finished destroy hooks", "name", name, "end", end, "duration", end.Sub(start)) 323 } 324 } 325 326 return merr.ErrorOrNil() 327 } 328 329 func (ar *allocRunner) preKillHooks() { 330 for _, hook := range ar.runnerHooks { 331 pre, ok := hook.(interfaces.RunnerPreKillHook) 332 if !ok { 333 continue 334 } 335 336 name := pre.Name() 337 var start time.Time 338 if ar.logger.IsTrace() { 339 start = time.Now() 340 ar.logger.Trace("running alloc pre shutdown hook", "name", name, "start", start) 341 } 342 343 pre.PreKill() 344 345 if ar.logger.IsTrace() { 346 end := time.Now() 347 ar.logger.Trace("finished alloc pre shutdown hook", "name", name, "end", end, "duration", end.Sub(start)) 348 } 349 } 350 } 351 352 // shutdownHooks calls graceful shutdown hooks for when the agent is exiting. 353 func (ar *allocRunner) shutdownHooks() { 354 for _, hook := range ar.runnerHooks { 355 sh, ok := hook.(interfaces.ShutdownHook) 356 if !ok { 357 continue 358 } 359 360 name := sh.Name() 361 var start time.Time 362 if ar.logger.IsTrace() { 363 start = time.Now() 364 ar.logger.Trace("running shutdown hook", "name", name, "start", start) 365 } 366 367 sh.Shutdown() 368 369 if ar.logger.IsTrace() { 370 end := time.Now() 371 ar.logger.Trace("finished shutdown hooks", "name", name, "end", end, "duration", end.Sub(start)) 372 } 373 } 374 } 375 376 func (ar *allocRunner) taskRestartHooks() { 377 for _, hook := range ar.runnerHooks { 378 re, ok := hook.(interfaces.RunnerTaskRestartHook) 379 if !ok { 380 continue 381 } 382 383 name := re.Name() 384 var start time.Time 385 if ar.logger.IsTrace() { 386 start = time.Now() 387 ar.logger.Trace("running alloc task restart hook", 388 "name", name, "start", start) 389 } 390 391 re.PreTaskRestart() 392 393 if ar.logger.IsTrace() { 394 end := time.Now() 395 ar.logger.Trace("finished alloc task restart hook", 396 "name", name, "end", end, "duration", end.Sub(start)) 397 } 398 } 399 }