github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/alloc_runner_hooks.go

github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/alloc_runner_hooks.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	multierror "github.com/hashicorp/go-multierror"
     8  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     9  	clientconfig "github.com/hashicorp/nomad/client/config"
    10  	cstructs "github.com/hashicorp/nomad/client/structs"
    11  	"github.com/hashicorp/nomad/client/taskenv"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/hashicorp/nomad/plugins/drivers"
    14  )
    15  
    16  type hookResourceSetter interface {
    17  	GetAllocHookResources() *cstructs.AllocHookResources
    18  	SetAllocHookResources(*cstructs.AllocHookResources)
    19  }
    20  
    21  type allocHookResourceSetter struct {
    22  	ar *allocRunner
    23  }
    24  
    25  func (a *allocHookResourceSetter) GetAllocHookResources() *cstructs.AllocHookResources {
    26  	a.ar.hookStateMu.RLock()
    27  	defer a.ar.hookStateMu.RUnlock()
    28  
    29  	return a.ar.hookState
    30  }
    31  
    32  func (a *allocHookResourceSetter) SetAllocHookResources(res *cstructs.AllocHookResources) {
    33  	a.ar.hookStateMu.Lock()
    34  	defer a.ar.hookStateMu.Unlock()
    35  
    36  	a.ar.hookState = res
    37  
    38  	// Propagate to all of the TRs within the lock to ensure consistent state.
    39  	// TODO: Refactor so TR's pull state from AR?
    40  	for _, tr := range a.ar.tasks {
    41  		tr.SetAllocHookResources(res)
    42  	}
    43  }
    44  
    45  type networkIsolationSetter interface {
    46  	SetNetworkIsolation(*drivers.NetworkIsolationSpec)
    47  }
    48  
    49  // allocNetworkIsolationSetter is a shim to allow the alloc network hook to
    50  // set the alloc network isolation configuration without full access
    51  // to the alloc runner
    52  type allocNetworkIsolationSetter struct {
    53  	ar *allocRunner
    54  }
    55  
    56  func (a *allocNetworkIsolationSetter) SetNetworkIsolation(n *drivers.NetworkIsolationSpec) {
    57  	for _, tr := range a.ar.tasks {
    58  		tr.SetNetworkIsolation(n)
    59  	}
    60  }
    61  
    62  // allocHealthSetter is a shim to allow the alloc health watcher hook to set
    63  // and clear the alloc health without full access to the alloc runner state
    64  type allocHealthSetter struct {
    65  	ar *allocRunner
    66  }
    67  
    68  // HasHealth returns true if a deployment status is already set.
    69  func (a *allocHealthSetter) HasHealth() bool {
    70  	a.ar.stateLock.Lock()
    71  	defer a.ar.stateLock.Unlock()
    72  	return a.ar.state.DeploymentStatus.HasHealth()
    73  }
    74  
    75  // ClearHealth allows the health watcher hook to clear the alloc's deployment
    76  // health if the deployment id changes. It does not update the server as the
    77  // status is only cleared when already receiving an update from the server.
    78  //
    79  // Only for use by health hook.
    80  func (a *allocHealthSetter) ClearHealth() {
    81  	a.ar.stateLock.Lock()
    82  	a.ar.state.ClearDeploymentStatus()
    83  	a.ar.persistDeploymentStatus(nil)
    84  	a.ar.stateLock.Unlock()
    85  }
    86  
    87  // SetHealth allows the health watcher hook to set the alloc's
    88  // deployment/migration health and emit task events.
    89  //
    90  // Only for use by health hook.
    91  func (a *allocHealthSetter) SetHealth(healthy, isDeploy bool, trackerTaskEvents map[string]*structs.TaskEvent) {
    92  	// Updating alloc deployment state is tricky because it may be nil, but
    93  	// if it's not then we need to maintain the values of Canary and
    94  	// ModifyIndex as they're only mutated by the server.
    95  	a.ar.stateLock.Lock()
    96  	a.ar.state.SetDeploymentStatus(time.Now(), healthy)
    97  	a.ar.persistDeploymentStatus(a.ar.state.DeploymentStatus)
    98  	terminalDesiredState := a.ar.Alloc().ServerTerminalStatus()
    99  	a.ar.stateLock.Unlock()
   100  
   101  	// If deployment is unhealthy emit task events explaining why
   102  	if !healthy && isDeploy && !terminalDesiredState {
   103  		for task, event := range trackerTaskEvents {
   104  			if tr, ok := a.ar.tasks[task]; ok {
   105  				// Append but don't emit event since the server
   106  				// will be updated below
   107  				tr.AppendEvent(event)
   108  			}
   109  		}
   110  	}
   111  
   112  	// Gather the state of the other tasks
   113  	states := make(map[string]*structs.TaskState, len(a.ar.tasks))
   114  	for name, tr := range a.ar.tasks {
   115  		states[name] = tr.TaskState()
   116  	}
   117  
   118  	// Build the client allocation
   119  	calloc := a.ar.clientAlloc(states)
   120  
   121  	// Update the server
   122  	a.ar.stateUpdater.AllocStateUpdated(calloc)
   123  
   124  	// Broadcast client alloc to listeners
   125  	a.ar.allocBroadcaster.Send(calloc)
   126  }
   127  
   128  // initRunnerHooks initializes the runners hooks.
   129  func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
   130  	hookLogger := ar.logger.Named("runner_hook")
   131  
   132  	// create health setting shim
   133  	hs := &allocHealthSetter{ar}
   134  
   135  	// create network isolation setting shim
   136  	ns := &allocNetworkIsolationSetter{ar: ar}
   137  
   138  	// create hook resource setting shim
   139  	hrs := &allocHookResourceSetter{ar: ar}
   140  	hrs.SetAllocHookResources(&cstructs.AllocHookResources{})
   141  
   142  	// build the network manager
   143  	nm, err := newNetworkManager(ar.Alloc(), ar.driverManager)
   144  	if err != nil {
   145  		return fmt.Errorf("failed to configure network manager: %v", err)
   146  	}
   147  
   148  	// create network configurator
   149  	nc, err := newNetworkConfigurator(hookLogger, ar.Alloc(), config)
   150  	if err != nil {
   151  		return fmt.Errorf("failed to initialize network configurator: %v", err)
   152  	}
   153  
   154  	// Create the alloc directory hook. This is run first to ensure the
   155  	// directory path exists for other hooks.
   156  	alloc := ar.Alloc()
   157  	ar.runnerHooks = []interfaces.RunnerHook{
   158  		newAllocDirHook(hookLogger, ar.allocDir),
   159  		newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher),
   160  		newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir),
   161  		newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient),
   162  		newNetworkHook(hookLogger, ns, alloc, nm, nc),
   163  		newGroupServiceHook(groupServiceHookConfig{
   164  			alloc:          alloc,
   165  			consul:         ar.consulClient,
   166  			restarter:      ar,
   167  			taskEnvBuilder: taskenv.NewBuilder(config.Node, ar.Alloc(), nil, config.Region).SetAllocDir(ar.allocDir.AllocDir),
   168  			logger:         hookLogger,
   169  		}),
   170  		newConsulSockHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig),
   171  		newCSIHook(hookLogger, alloc, ar.rpcClient, ar.csiManager, hrs),
   172  	}
   173  
   174  	return nil
   175  }
   176  
   177  // prerun is used to run the runners prerun hooks.
   178  func (ar *allocRunner) prerun() error {
   179  	if ar.logger.IsTrace() {
   180  		start := time.Now()
   181  		ar.logger.Trace("running pre-run hooks", "start", start)
   182  		defer func() {
   183  			end := time.Now()
   184  			ar.logger.Trace("finished pre-run hooks", "end", end, "duration", end.Sub(start))
   185  		}()
   186  	}
   187  
   188  	for _, hook := range ar.runnerHooks {
   189  		pre, ok := hook.(interfaces.RunnerPrerunHook)
   190  		if !ok {
   191  			continue
   192  		}
   193  
   194  		name := pre.Name()
   195  		var start time.Time
   196  		if ar.logger.IsTrace() {
   197  			start = time.Now()
   198  			ar.logger.Trace("running pre-run hook", "name", name, "start", start)
   199  		}
   200  
   201  		if err := pre.Prerun(); err != nil {
   202  			return fmt.Errorf("pre-run hook %q failed: %v", name, err)
   203  		}
   204  
   205  		if ar.logger.IsTrace() {
   206  			end := time.Now()
   207  			ar.logger.Trace("finished pre-run hook", "name", name, "end", end, "duration", end.Sub(start))
   208  		}
   209  	}
   210  
   211  	return nil
   212  }
   213  
   214  // update runs the alloc runner update hooks. Update hooks are run
   215  // asynchronously with all other alloc runner operations.
   216  func (ar *allocRunner) update(update *structs.Allocation) error {
   217  	if ar.logger.IsTrace() {
   218  		start := time.Now()
   219  		ar.logger.Trace("running update hooks", "start", start)
   220  		defer func() {
   221  			end := time.Now()
   222  			ar.logger.Trace("finished update hooks", "end", end, "duration", end.Sub(start))
   223  		}()
   224  	}
   225  
   226  	req := &interfaces.RunnerUpdateRequest{
   227  		Alloc: update,
   228  	}
   229  
   230  	var merr multierror.Error
   231  	for _, hook := range ar.runnerHooks {
   232  		h, ok := hook.(interfaces.RunnerUpdateHook)
   233  		if !ok {
   234  			continue
   235  		}
   236  
   237  		name := h.Name()
   238  		var start time.Time
   239  		if ar.logger.IsTrace() {
   240  			start = time.Now()
   241  			ar.logger.Trace("running update hook", "name", name, "start", start)
   242  		}
   243  
   244  		if err := h.Update(req); err != nil {
   245  			merr.Errors = append(merr.Errors, fmt.Errorf("update hook %q failed: %v", name, err))
   246  		}
   247  
   248  		if ar.logger.IsTrace() {
   249  			end := time.Now()
   250  			ar.logger.Trace("finished update hooks", "name", name, "end", end, "duration", end.Sub(start))
   251  		}
   252  	}
   253  
   254  	return merr.ErrorOrNil()
   255  }
   256  
   257  // postrun is used to run the runners postrun hooks.
   258  func (ar *allocRunner) postrun() error {
   259  	if ar.logger.IsTrace() {
   260  		start := time.Now()
   261  		ar.logger.Trace("running post-run hooks", "start", start)
   262  		defer func() {
   263  			end := time.Now()
   264  			ar.logger.Trace("finished post-run hooks", "end", end, "duration", end.Sub(start))
   265  		}()
   266  	}
   267  
   268  	for _, hook := range ar.runnerHooks {
   269  		post, ok := hook.(interfaces.RunnerPostrunHook)
   270  		if !ok {
   271  			continue
   272  		}
   273  
   274  		name := post.Name()
   275  		var start time.Time
   276  		if ar.logger.IsTrace() {
   277  			start = time.Now()
   278  			ar.logger.Trace("running post-run hook", "name", name, "start", start)
   279  		}
   280  
   281  		if err := post.Postrun(); err != nil {
   282  			return fmt.Errorf("hook %q failed: %v", name, err)
   283  		}
   284  
   285  		if ar.logger.IsTrace() {
   286  			end := time.Now()
   287  			ar.logger.Trace("finished post-run hooks", "name", name, "end", end, "duration", end.Sub(start))
   288  		}
   289  	}
   290  
   291  	return nil
   292  }
   293  
   294  // destroy is used to run the runners destroy hooks. All hooks are run and
   295  // errors are returned as a multierror.
   296  func (ar *allocRunner) destroy() error {
   297  	if ar.logger.IsTrace() {
   298  		start := time.Now()
   299  		ar.logger.Trace("running destroy hooks", "start", start)
   300  		defer func() {
   301  			end := time.Now()
   302  			ar.logger.Trace("finished destroy hooks", "end", end, "duration", end.Sub(start))
   303  		}()
   304  	}
   305  
   306  	var merr multierror.Error
   307  	for _, hook := range ar.runnerHooks {
   308  		h, ok := hook.(interfaces.RunnerDestroyHook)
   309  		if !ok {
   310  			continue
   311  		}
   312  
   313  		name := h.Name()
   314  		var start time.Time
   315  		if ar.logger.IsTrace() {
   316  			start = time.Now()
   317  			ar.logger.Trace("running destroy hook", "name", name, "start", start)
   318  		}
   319  
   320  		if err := h.Destroy(); err != nil {
   321  			merr.Errors = append(merr.Errors, fmt.Errorf("destroy hook %q failed: %v", name, err))
   322  		}
   323  
   324  		if ar.logger.IsTrace() {
   325  			end := time.Now()
   326  			ar.logger.Trace("finished destroy hooks", "name", name, "end", end, "duration", end.Sub(start))
   327  		}
   328  	}
   329  
   330  	return merr.ErrorOrNil()
   331  }
   332  
   333  func (ar *allocRunner) preKillHooks() {
   334  	for _, hook := range ar.runnerHooks {
   335  		pre, ok := hook.(interfaces.RunnerPreKillHook)
   336  		if !ok {
   337  			continue
   338  		}
   339  
   340  		name := pre.Name()
   341  		var start time.Time
   342  		if ar.logger.IsTrace() {
   343  			start = time.Now()
   344  			ar.logger.Trace("running alloc pre shutdown hook", "name", name, "start", start)
   345  		}
   346  
   347  		pre.PreKill()
   348  
   349  		if ar.logger.IsTrace() {
   350  			end := time.Now()
   351  			ar.logger.Trace("finished alloc pre shutdown hook", "name", name, "end", end, "duration", end.Sub(start))
   352  		}
   353  	}
   354  }
   355  
   356  // shutdownHooks calls graceful shutdown hooks for when the agent is exiting.
   357  func (ar *allocRunner) shutdownHooks() {
   358  	for _, hook := range ar.runnerHooks {
   359  		sh, ok := hook.(interfaces.ShutdownHook)
   360  		if !ok {
   361  			continue
   362  		}
   363  
   364  		name := sh.Name()
   365  		var start time.Time
   366  		if ar.logger.IsTrace() {
   367  			start = time.Now()
   368  			ar.logger.Trace("running shutdown hook", "name", name, "start", start)
   369  		}
   370  
   371  		sh.Shutdown()
   372  
   373  		if ar.logger.IsTrace() {
   374  			end := time.Now()
   375  			ar.logger.Trace("finished shutdown hooks", "name", name, "end", end, "duration", end.Sub(start))
   376  		}
   377  	}
   378  }