github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/alloc_runner_hooks.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/alloc_runner_hooks.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	multierror "github.com/hashicorp/go-multierror"
     8  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     9  	clientconfig "github.com/hashicorp/nomad/client/config"
    10  	cstructs "github.com/hashicorp/nomad/client/structs"
    11  	"github.com/hashicorp/nomad/client/taskenv"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  type hookResourceSetter interface {
    16  	GetAllocHookResources() *cstructs.AllocHookResources
    17  	SetAllocHookResources(*cstructs.AllocHookResources)
    18  }
    19  
    20  type allocHookResourceSetter struct {
    21  	ar *allocRunner
    22  }
    23  
    24  func (a *allocHookResourceSetter) GetAllocHookResources() *cstructs.AllocHookResources {
    25  	a.ar.hookStateMu.RLock()
    26  	defer a.ar.hookStateMu.RUnlock()
    27  
    28  	return a.ar.hookState
    29  }
    30  
    31  func (a *allocHookResourceSetter) SetAllocHookResources(res *cstructs.AllocHookResources) {
    32  	a.ar.hookStateMu.Lock()
    33  	defer a.ar.hookStateMu.Unlock()
    34  
    35  	a.ar.hookState = res
    36  
    37  	// Propagate to all of the TRs within the lock to ensure consistent state.
    38  	// TODO: Refactor so TR's pull state from AR?
    39  	for _, tr := range a.ar.tasks {
    40  		tr.SetAllocHookResources(res)
    41  	}
    42  }
    43  
    44  // allocHealthSetter is a shim to allow the alloc health watcher hook to set
    45  // and clear the alloc health without full access to the alloc runner state
    46  type allocHealthSetter struct {
    47  	ar *allocRunner
    48  }
    49  
    50  // HasHealth returns true if a deployment status is already set.
    51  func (a *allocHealthSetter) HasHealth() bool {
    52  	a.ar.stateLock.Lock()
    53  	defer a.ar.stateLock.Unlock()
    54  	return a.ar.state.DeploymentStatus.HasHealth()
    55  }
    56  
    57  // ClearHealth allows the health watcher hook to clear the alloc's deployment
    58  // health if the deployment id changes. It does not update the server as the
    59  // status is only cleared when already receiving an update from the server.
    60  //
    61  // Only for use by health hook.
    62  func (a *allocHealthSetter) ClearHealth() {
    63  	a.ar.stateLock.Lock()
    64  	a.ar.state.ClearDeploymentStatus()
    65  	a.ar.persistDeploymentStatus(nil)
    66  	a.ar.stateLock.Unlock()
    67  }
    68  
    69  // SetHealth allows the health watcher hook to set the alloc's
    70  // deployment/migration health and emit task events.
    71  //
    72  // Only for use by health hook.
    73  func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents map[string]*structs.TaskEvent) {
    74  	// Updating alloc deployment state is tricky because it may be nil, but
    75  	// if it's not then we need to maintain the values of Canary and
    76  	// ModifyIndex as they're only mutated by the server.
    77  	a.ar.stateLock.Lock()
    78  	a.ar.state.SetDeploymentStatus(time.Now(), healthy)
    79  	a.ar.persistDeploymentStatus(a.ar.state.DeploymentStatus)
    80  	terminalDesiredState := a.ar.Alloc().ServerTerminalStatus()
    81  	a.ar.stateLock.Unlock()
    82  
    83  	// If deployment is unhealthy emit task events explaining why
    84  	if !healthy && isDeploy && !terminalDesiredState {
    85  		for task, event := range trackerTaskEvents {
    86  			if tr, ok := a.ar.tasks[task]; ok {
    87  				// Append but don't emit event since the server
    88  				// will be updated below
    89  				tr.AppendEvent(event)
    90  			}
    91  		}
    92  	}
    93  
    94  	// Gather the state of the other tasks
    95  	states := make(map[string]*structs.TaskState, len(a.ar.tasks))
    96  	for name, tr := range a.ar.tasks {
    97  		states[name] = tr.TaskState()
    98  	}
    99  
   100  	// Build the client allocation
   101  	calloc := a.ar.clientAlloc(states)
   102  
   103  	// Update the server
   104  	a.ar.stateUpdater.AllocStateUpdated(calloc)
   105  
   106  	// Broadcast client alloc to listeners
   107  	a.ar.allocBroadcaster.Send(calloc)
   108  }
   109  
   110  // initRunnerHooks initializes the runners hooks.
   111  func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
   112  	hookLogger := ar.logger.Named("runner_hook")
   113  
   114  	// create health setting shim
   115  	hs := &allocHealthSetter{ar}
   116  
   117  	// create network isolation setting shim
   118  	ns := &allocNetworkIsolationSetter{ar: ar}
   119  
   120  	// create hook resource setting shim
   121  	hrs := &allocHookResourceSetter{ar: ar}
   122  	hrs.SetAllocHookResources(&cstructs.AllocHookResources{})
   123  
   124  	// build the network manager
   125  	nm, err := newNetworkManager(ar.Alloc(), ar.driverManager)
   126  	if err != nil {
   127  		return fmt.Errorf("failed to configure network manager: %v", err)
   128  	}
   129  
   130  	// create network configurator
   131  	nc, err := newNetworkConfigurator(hookLogger, ar.Alloc(), config)
   132  	if err != nil {
   133  		return fmt.Errorf("failed to initialize network configurator: %v", err)
   134  	}
   135  
   136  	// Create a new taskenv.Builder which is used and mutated by networkHook.
   137  	envBuilder := taskenv.NewBuilder(
   138  		config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir)
   139  
   140  	// Create a taskenv.TaskEnv which is used for read only purposes by the
   141  	// newNetworkHook.
   142  	builtTaskEnv := envBuilder.Build()
   143  
   144  	// Create the alloc directory hook. This is run first to ensure the
   145  	// directory path exists for other hooks.
   146  	alloc := ar.Alloc()
   147  	ar.runnerHooks = []interfaces.RunnerHook{
   148  		newAllocDirHook(hookLogger, ar.allocDir),
   149  		newCgroupHook(ar.Alloc(), ar.cpusetManager),
   150  		newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher),
   151  		newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir),
   152  		newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient, ar.checkStore),
   153  		newNetworkHook(hookLogger, ns, alloc, nm, nc, ar, builtTaskEnv),
   154  		newGroupServiceHook(groupServiceHookConfig{
   155  			alloc:             alloc,
   156  			providerNamespace: alloc.ServiceProviderNamespace(),
   157  			serviceRegWrapper: ar.serviceRegWrapper,
   158  			restarter:         ar,
   159  			taskEnvBuilder:    envBuilder,
   160  			networkStatus:     ar,
   161  			logger:            hookLogger,
   162  			shutdownDelayCtx:  ar.shutdownDelayCtx,
   163  		}),
   164  		newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig, config.Node.Attributes),
   165  		newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
   166  		newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID),
   167  		newChecksHook(hookLogger, alloc, ar.checkStore, ar),
   168  	}
   169  
   170  	return nil
   171  }
   172  
   173  // prerun is used to run the runners prerun hooks.
   174  func (ar *allocRunner) prerun() error {
   175  	if ar.logger.IsTrace() {
   176  		start := time.Now()
   177  		ar.logger.Trace("running pre-run hooks", "start", start)
   178  		defer func() {
   179  			end := time.Now()
   180  			ar.logger.Trace("finished pre-run hooks", "end", end, "duration", end.Sub(start))
   181  		}()
   182  	}
   183  
   184  	for _, hook := range ar.runnerHooks {
   185  		pre, ok := hook.(interfaces.RunnerPrerunHook)
   186  		if !ok {
   187  			continue
   188  		}
   189  
   190  		name := pre.Name()
   191  		var start time.Time
   192  		if ar.logger.IsTrace() {
   193  			start = time.Now()
   194  			ar.logger.Trace("running pre-run hook", "name", name, "start", start)
   195  		}
   196  
   197  		if err := pre.Prerun(); err != nil {
   198  			return fmt.Errorf("pre-run hook %q failed: %v", name, err)
   199  		}
   200  
   201  		if ar.logger.IsTrace() {
   202  			end := time.Now()
   203  			ar.logger.Trace("finished pre-run hook", "name", name, "end", end, "duration", end.Sub(start))
   204  		}
   205  	}
   206  
   207  	return nil
   208  }
   209  
   210  // update runs the alloc runner update hooks. Update hooks are run
   211  // asynchronously with all other alloc runner operations.
   212  func (ar *allocRunner) update(update *structs.Allocation) error {
   213  	if ar.logger.IsTrace() {
   214  		start := time.Now()
   215  		ar.logger.Trace("running update hooks", "start", start)
   216  		defer func() {
   217  			end := time.Now()
   218  			ar.logger.Trace("finished update hooks", "end", end, "duration", end.Sub(start))
   219  		}()
   220  	}
   221  
   222  	req := &interfaces.RunnerUpdateRequest{
   223  		Alloc: update,
   224  	}
   225  
   226  	var merr multierror.Error
   227  	for _, hook := range ar.runnerHooks {
   228  		h, ok := hook.(interfaces.RunnerUpdateHook)
   229  		if !ok {
   230  			continue
   231  		}
   232  
   233  		name := h.Name()
   234  		var start time.Time
   235  		if ar.logger.IsTrace() {
   236  			start = time.Now()
   237  			ar.logger.Trace("running update hook", "name", name, "start", start)
   238  		}
   239  
   240  		if err := h.Update(req); err != nil {
   241  			merr.Errors = append(merr.Errors, fmt.Errorf("update hook %q failed: %v", name, err))
   242  		}
   243  
   244  		if ar.logger.IsTrace() {
   245  			end := time.Now()
   246  			ar.logger.Trace("finished update hooks", "name", name, "end", end, "duration", end.Sub(start))
   247  		}
   248  	}
   249  
   250  	return merr.ErrorOrNil()
   251  }
   252  
   253  // postrun is used to run the runners postrun hooks.
   254  func (ar *allocRunner) postrun() error {
   255  	if ar.logger.IsTrace() {
   256  		start := time.Now()
   257  		ar.logger.Trace("running post-run hooks", "start", start)
   258  		defer func() {
   259  			end := time.Now()
   260  			ar.logger.Trace("finished post-run hooks", "end", end, "duration", end.Sub(start))
   261  		}()
   262  	}
   263  
   264  	for _, hook := range ar.runnerHooks {
   265  		post, ok := hook.(interfaces.RunnerPostrunHook)
   266  		if !ok {
   267  			continue
   268  		}
   269  
   270  		name := post.Name()
   271  		var start time.Time
   272  		if ar.logger.IsTrace() {
   273  			start = time.Now()
   274  			ar.logger.Trace("running post-run hook", "name", name, "start", start)
   275  		}
   276  
   277  		if err := post.Postrun(); err != nil {
   278  			return fmt.Errorf("hook %q failed: %v", name, err)
   279  		}
   280  
   281  		if ar.logger.IsTrace() {
   282  			end := time.Now()
   283  			ar.logger.Trace("finished post-run hooks", "name", name, "end", end, "duration", end.Sub(start))
   284  		}
   285  	}
   286  
   287  	return nil
   288  }
   289  
   290  // destroy is used to run the runners destroy hooks. All hooks are run and
   291  // errors are returned as a multierror.
   292  func (ar *allocRunner) destroy() error {
   293  	if ar.logger.IsTrace() {
   294  		start := time.Now()
   295  		ar.logger.Trace("running destroy hooks", "start", start)
   296  		defer func() {
   297  			end := time.Now()
   298  			ar.logger.Trace("finished destroy hooks", "end", end, "duration", end.Sub(start))
   299  		}()
   300  	}
   301  
   302  	var merr multierror.Error
   303  	for _, hook := range ar.runnerHooks {
   304  		h, ok := hook.(interfaces.RunnerDestroyHook)
   305  		if !ok {
   306  			continue
   307  		}
   308  
   309  		name := h.Name()
   310  		var start time.Time
   311  		if ar.logger.IsTrace() {
   312  			start = time.Now()
   313  			ar.logger.Trace("running destroy hook", "name", name, "start", start)
   314  		}
   315  
   316  		if err := h.Destroy(); err != nil {
   317  			merr.Errors = append(merr.Errors, fmt.Errorf("destroy hook %q failed: %v", name, err))
   318  		}
   319  
   320  		if ar.logger.IsTrace() {
   321  			end := time.Now()
   322  			ar.logger.Trace("finished destroy hooks", "name", name, "end", end, "duration", end.Sub(start))
   323  		}
   324  	}
   325  
   326  	return merr.ErrorOrNil()
   327  }
   328  
   329  func (ar *allocRunner) preKillHooks() {
   330  	for _, hook := range ar.runnerHooks {
   331  		pre, ok := hook.(interfaces.RunnerPreKillHook)
   332  		if !ok {
   333  			continue
   334  		}
   335  
   336  		name := pre.Name()
   337  		var start time.Time
   338  		if ar.logger.IsTrace() {
   339  			start = time.Now()
   340  			ar.logger.Trace("running alloc pre shutdown hook", "name", name, "start", start)
   341  		}
   342  
   343  		pre.PreKill()
   344  
   345  		if ar.logger.IsTrace() {
   346  			end := time.Now()
   347  			ar.logger.Trace("finished alloc pre shutdown hook", "name", name, "end", end, "duration", end.Sub(start))
   348  		}
   349  	}
   350  }
   351  
   352  // shutdownHooks calls graceful shutdown hooks for when the agent is exiting.
   353  func (ar *allocRunner) shutdownHooks() {
   354  	for _, hook := range ar.runnerHooks {
   355  		sh, ok := hook.(interfaces.ShutdownHook)
   356  		if !ok {
   357  			continue
   358  		}
   359  
   360  		name := sh.Name()
   361  		var start time.Time
   362  		if ar.logger.IsTrace() {
   363  			start = time.Now()
   364  			ar.logger.Trace("running shutdown hook", "name", name, "start", start)
   365  		}
   366  
   367  		sh.Shutdown()
   368  
   369  		if ar.logger.IsTrace() {
   370  			end := time.Now()
   371  			ar.logger.Trace("finished shutdown hooks", "name", name, "end", end, "duration", end.Sub(start))
   372  		}
   373  	}
   374  }
   375  
   376  func (ar *allocRunner) taskRestartHooks() {
   377  	for _, hook := range ar.runnerHooks {
   378  		re, ok := hook.(interfaces.RunnerTaskRestartHook)
   379  		if !ok {
   380  			continue
   381  		}
   382  
   383  		name := re.Name()
   384  		var start time.Time
   385  		if ar.logger.IsTrace() {
   386  			start = time.Now()
   387  			ar.logger.Trace("running alloc task restart hook",
   388  				"name", name, "start", start)
   389  		}
   390  
   391  		re.PreTaskRestart()
   392  
   393  		if ar.logger.IsTrace() {
   394  			end := time.Now()
   395  			ar.logger.Trace("finished alloc task restart hook",
   396  				"name", name, "end", end, "duration", end.Sub(start))
   397  		}
   398  	}
   399  }