github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/alloc_runner_hooks.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/alloc_runner_hooks.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	multierror "github.com/hashicorp/go-multierror"
     8  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     9  	clientconfig "github.com/hashicorp/nomad/client/config"
    10  	cstructs "github.com/hashicorp/nomad/client/structs"
    11  	"github.com/hashicorp/nomad/client/taskenv"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  type hookResourceSetter interface {
    16  	GetAllocHookResources() *cstructs.AllocHookResources
    17  	SetAllocHookResources(*cstructs.AllocHookResources)
    18  }
    19  
    20  type allocHookResourceSetter struct {
    21  	ar *allocRunner
    22  }
    23  
    24  func (a *allocHookResourceSetter) GetAllocHookResources() *cstructs.AllocHookResources {
    25  	a.ar.hookStateMu.RLock()
    26  	defer a.ar.hookStateMu.RUnlock()
    27  
    28  	return a.ar.hookState
    29  }
    30  
    31  func (a *allocHookResourceSetter) SetAllocHookResources(res *cstructs.AllocHookResources) {
    32  	a.ar.hookStateMu.Lock()
    33  	defer a.ar.hookStateMu.Unlock()
    34  
    35  	a.ar.hookState = res
    36  
    37  	// Propagate to all of the TRs within the lock to ensure consistent state.
    38  	// TODO: Refactor so TR's pull state from AR?
    39  	for _, tr := range a.ar.tasks {
    40  		tr.SetAllocHookResources(res)
    41  	}
    42  }
    43  
    44  // allocHealthSetter is a shim to allow the alloc health watcher hook to set
    45  // and clear the alloc health without full access to the alloc runner state
    46  type allocHealthSetter struct {
    47  	ar *allocRunner
    48  }
    49  
    50  // HasHealth returns true if a deployment status is already set.
    51  func (a *allocHealthSetter) HasHealth() bool {
    52  	a.ar.stateLock.Lock()
    53  	defer a.ar.stateLock.Unlock()
    54  	return a.ar.state.DeploymentStatus.HasHealth()
    55  }
    56  
    57  // ClearHealth allows the health watcher hook to clear the alloc's deployment
    58  // health if the deployment id changes. It does not update the server as the
    59  // status is only cleared when already receiving an update from the server.
    60  //
    61  // Only for use by health hook.
    62  func (a *allocHealthSetter) ClearHealth() {
    63  	a.ar.stateLock.Lock()
    64  	a.ar.state.ClearDeploymentStatus()
    65  	a.ar.persistDeploymentStatus(nil)
    66  	a.ar.stateLock.Unlock()
    67  }
    68  
    69  // SetHealth allows the health watcher hook to set the alloc's
    70  // deployment/migration health and emit task events.
    71  //
    72  // Only for use by health hook.
    73  func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents map[string]*structs.TaskEvent) {
    74  	// Updating alloc deployment state is tricky because it may be nil, but
    75  	// if it's not then we need to maintain the values of Canary and
    76  	// ModifyIndex as they're only mutated by the server.
    77  	a.ar.stateLock.Lock()
    78  	a.ar.state.SetDeploymentStatus(time.Now(), healthy)
    79  	a.ar.persistDeploymentStatus(a.ar.state.DeploymentStatus)
    80  	terminalDesiredState := a.ar.Alloc().ServerTerminalStatus()
    81  	a.ar.stateLock.Unlock()
    82  
    83  	// If deployment is unhealthy emit task events explaining why
    84  	if !healthy && isDeploy && !terminalDesiredState {
    85  		for task, event := range trackerTaskEvents {
    86  			if tr, ok := a.ar.tasks[task]; ok {
    87  				// Append but don't emit event since the server
    88  				// will be updated below
    89  				tr.AppendEvent(event)
    90  			}
    91  		}
    92  	}
    93  
    94  	// Gather the state of the other tasks
    95  	states := make(map[string]*structs.TaskState, len(a.ar.tasks))
    96  	for name, tr := range a.ar.tasks {
    97  		states[name] = tr.TaskState()
    98  	}
    99  
   100  	// Build the client allocation
   101  	calloc := a.ar.clientAlloc(states)
   102  
   103  	// Update the server
   104  	a.ar.stateUpdater.AllocStateUpdated(calloc)
   105  
   106  	// Broadcast client alloc to listeners
   107  	a.ar.allocBroadcaster.Send(calloc)
   108  }
   109  
   110  // initRunnerHooks initializes the runners hooks.
   111  func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
   112  	hookLogger := ar.logger.Named("runner_hook")
   113  
   114  	// create health setting shim
   115  	hs := &allocHealthSetter{ar}
   116  
   117  	// create network isolation setting shim
   118  	ns := &allocNetworkIsolationSetter{ar: ar}
   119  
   120  	// create hook resource setting shim
   121  	hrs := &allocHookResourceSetter{ar: ar}
   122  	hrs.SetAllocHookResources(&cstructs.AllocHookResources{})
   123  
   124  	// build the network manager
   125  	nm, err := newNetworkManager(ar.Alloc(), ar.driverManager)
   126  	if err != nil {
   127  		return fmt.Errorf("failed to configure network manager: %v", err)
   128  	}
   129  
   130  	// create network configurator
   131  	nc, err := newNetworkConfigurator(hookLogger, ar.Alloc(), config)
   132  	if err != nil {
   133  		return fmt.Errorf("failed to initialize network configurator: %v", err)
   134  	}
   135  
   136  	// Create the alloc directory hook. This is run first to ensure the
   137  	// directory path exists for other hooks.
   138  	alloc := ar.Alloc()
   139  	ar.runnerHooks = []interfaces.RunnerHook{
   140  		newAllocDirHook(hookLogger, ar.allocDir),
   141  		newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher),
   142  		newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir),
   143  		newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient),
   144  		newNetworkHook(hookLogger, ns, alloc, nm, nc, ar),
   145  		newGroupServiceHook(groupServiceHookConfig{
   146  			alloc:               alloc,
   147  			consul:              ar.consulClient,
   148  			restarter:           ar,
   149  			taskEnvBuilder:      taskenv.NewBuilder(config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir),
   150  			networkStatusGetter: ar,
   151  			logger:              hookLogger,
   152  		}),
   153  		newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
   154  		newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
   155  		newCSIHook(ar, hookLogger, alloc, ar.rpcClient, ar.csiManager, hrs),
   156  	}
   157  
   158  	return nil
   159  }
   160  
   161  // prerun is used to run the runners prerun hooks.
   162  func (ar *allocRunner) prerun() error {
   163  	if ar.logger.IsTrace() {
   164  		start := time.Now()
   165  		ar.logger.Trace("running pre-run hooks", "start", start)
   166  		defer func() {
   167  			end := time.Now()
   168  			ar.logger.Trace("finished pre-run hooks", "end", end, "duration", end.Sub(start))
   169  		}()
   170  	}
   171  
   172  	for _, hook := range ar.runnerHooks {
   173  		pre, ok := hook.(interfaces.RunnerPrerunHook)
   174  		if !ok {
   175  			continue
   176  		}
   177  
   178  		name := pre.Name()
   179  		var start time.Time
   180  		if ar.logger.IsTrace() {
   181  			start = time.Now()
   182  			ar.logger.Trace("running pre-run hook", "name", name, "start", start)
   183  		}
   184  
   185  		if err := pre.Prerun(); err != nil {
   186  			return fmt.Errorf("pre-run hook %q failed: %v", name, err)
   187  		}
   188  
   189  		if ar.logger.IsTrace() {
   190  			end := time.Now()
   191  			ar.logger.Trace("finished pre-run hook", "name", name, "end", end, "duration", end.Sub(start))
   192  		}
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  // update runs the alloc runner update hooks. Update hooks are run
   199  // asynchronously with all other alloc runner operations.
   200  func (ar *allocRunner) update(update *structs.Allocation) error {
   201  	if ar.logger.IsTrace() {
   202  		start := time.Now()
   203  		ar.logger.Trace("running update hooks", "start", start)
   204  		defer func() {
   205  			end := time.Now()
   206  			ar.logger.Trace("finished update hooks", "end", end, "duration", end.Sub(start))
   207  		}()
   208  	}
   209  
   210  	req := &interfaces.RunnerUpdateRequest{
   211  		Alloc: update,
   212  	}
   213  
   214  	var merr multierror.Error
   215  	for _, hook := range ar.runnerHooks {
   216  		h, ok := hook.(interfaces.RunnerUpdateHook)
   217  		if !ok {
   218  			continue
   219  		}
   220  
   221  		name := h.Name()
   222  		var start time.Time
   223  		if ar.logger.IsTrace() {
   224  			start = time.Now()
   225  			ar.logger.Trace("running update hook", "name", name, "start", start)
   226  		}
   227  
   228  		if err := h.Update(req); err != nil {
   229  			merr.Errors = append(merr.Errors, fmt.Errorf("update hook %q failed: %v", name, err))
   230  		}
   231  
   232  		if ar.logger.IsTrace() {
   233  			end := time.Now()
   234  			ar.logger.Trace("finished update hooks", "name", name, "end", end, "duration", end.Sub(start))
   235  		}
   236  	}
   237  
   238  	return merr.ErrorOrNil()
   239  }
   240  
   241  // postrun is used to run the runners postrun hooks.
   242  func (ar *allocRunner) postrun() error {
   243  	if ar.logger.IsTrace() {
   244  		start := time.Now()
   245  		ar.logger.Trace("running post-run hooks", "start", start)
   246  		defer func() {
   247  			end := time.Now()
   248  			ar.logger.Trace("finished post-run hooks", "end", end, "duration", end.Sub(start))
   249  		}()
   250  	}
   251  
   252  	for _, hook := range ar.runnerHooks {
   253  		post, ok := hook.(interfaces.RunnerPostrunHook)
   254  		if !ok {
   255  			continue
   256  		}
   257  
   258  		name := post.Name()
   259  		var start time.Time
   260  		if ar.logger.IsTrace() {
   261  			start = time.Now()
   262  			ar.logger.Trace("running post-run hook", "name", name, "start", start)
   263  		}
   264  
   265  		if err := post.Postrun(); err != nil {
   266  			return fmt.Errorf("hook %q failed: %v", name, err)
   267  		}
   268  
   269  		if ar.logger.IsTrace() {
   270  			end := time.Now()
   271  			ar.logger.Trace("finished post-run hooks", "name", name, "end", end, "duration", end.Sub(start))
   272  		}
   273  	}
   274  
   275  	return nil
   276  }
   277  
   278  // destroy is used to run the runners destroy hooks. All hooks are run and
   279  // errors are returned as a multierror.
   280  func (ar *allocRunner) destroy() error {
   281  	if ar.logger.IsTrace() {
   282  		start := time.Now()
   283  		ar.logger.Trace("running destroy hooks", "start", start)
   284  		defer func() {
   285  			end := time.Now()
   286  			ar.logger.Trace("finished destroy hooks", "end", end, "duration", end.Sub(start))
   287  		}()
   288  	}
   289  
   290  	var merr multierror.Error
   291  	for _, hook := range ar.runnerHooks {
   292  		h, ok := hook.(interfaces.RunnerDestroyHook)
   293  		if !ok {
   294  			continue
   295  		}
   296  
   297  		name := h.Name()
   298  		var start time.Time
   299  		if ar.logger.IsTrace() {
   300  			start = time.Now()
   301  			ar.logger.Trace("running destroy hook", "name", name, "start", start)
   302  		}
   303  
   304  		if err := h.Destroy(); err != nil {
   305  			merr.Errors = append(merr.Errors, fmt.Errorf("destroy hook %q failed: %v", name, err))
   306  		}
   307  
   308  		if ar.logger.IsTrace() {
   309  			end := time.Now()
   310  			ar.logger.Trace("finished destroy hooks", "name", name, "end", end, "duration", end.Sub(start))
   311  		}
   312  	}
   313  
   314  	return merr.ErrorOrNil()
   315  }
   316  
   317  func (ar *allocRunner) preKillHooks() {
   318  	for _, hook := range ar.runnerHooks {
   319  		pre, ok := hook.(interfaces.RunnerPreKillHook)
   320  		if !ok {
   321  			continue
   322  		}
   323  
   324  		name := pre.Name()
   325  		var start time.Time
   326  		if ar.logger.IsTrace() {
   327  			start = time.Now()
   328  			ar.logger.Trace("running alloc pre shutdown hook", "name", name, "start", start)
   329  		}
   330  
   331  		pre.PreKill()
   332  
   333  		if ar.logger.IsTrace() {
   334  			end := time.Now()
   335  			ar.logger.Trace("finished alloc pre shutdown hook", "name", name, "end", end, "duration", end.Sub(start))
   336  		}
   337  	}
   338  }
   339  
   340  // shutdownHooks calls graceful shutdown hooks for when the agent is exiting.
   341  func (ar *allocRunner) shutdownHooks() {
   342  	for _, hook := range ar.runnerHooks {
   343  		sh, ok := hook.(interfaces.ShutdownHook)
   344  		if !ok {
   345  			continue
   346  		}
   347  
   348  		name := sh.Name()
   349  		var start time.Time
   350  		if ar.logger.IsTrace() {
   351  			start = time.Now()
   352  			ar.logger.Trace("running shutdown hook", "name", name, "start", start)
   353  		}
   354  
   355  		sh.Shutdown()
   356  
   357  		if ar.logger.IsTrace() {
   358  			end := time.Now()
   359  			ar.logger.Trace("finished shutdown hooks", "name", name, "end", end, "duration", end.Sub(start))
   360  		}
   361  	}
   362  }
   363  
   364  func (ar *allocRunner) taskRestartHooks() {
   365  	for _, hook := range ar.runnerHooks {
   366  		re, ok := hook.(interfaces.RunnerTaskRestartHook)
   367  		if !ok {
   368  			continue
   369  		}
   370  
   371  		name := re.Name()
   372  		var start time.Time
   373  		if ar.logger.IsTrace() {
   374  			start = time.Now()
   375  			ar.logger.Trace("running alloc task restart hook",
   376  				"name", name, "start", start)
   377  		}
   378  
   379  		re.PreTaskRestart()
   380  
   381  		if ar.logger.IsTrace() {
   382  			end := time.Now()
   383  			ar.logger.Trace("finished alloc task restart hook",
   384  				"name", name, "end", end, "duration", end.Sub(start))
   385  		}
   386  	}
   387  }