github.com/bigcommerce/nomad@v0.9.3-bc/drivers/exec/driver.go (about)

     1  package exec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/hashicorp/consul-template/signals"
    13  	hclog "github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/nomad/client/fingerprint"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper"
    18  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    19  	"github.com/hashicorp/nomad/plugins/base"
    20  	"github.com/hashicorp/nomad/plugins/drivers"
    21  	"github.com/hashicorp/nomad/plugins/drivers/utils"
    22  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    23  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    24  )
    25  
    26  const (
    27  	// pluginName is the name of the plugin
    28  	pluginName = "exec"
    29  
    30  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    31  	fingerprintPeriod = 30 * time.Second
    32  
    33  	// taskHandleVersion is the version of task handle which this driver sets
    34  	// and understands how to decode driver state
    35  	taskHandleVersion = 1
    36  )
    37  
    38  var (
    39  	// PluginID is the exec plugin metadata registered in the plugin
    40  	// catalog.
    41  	PluginID = loader.PluginID{
    42  		Name:       pluginName,
    43  		PluginType: base.PluginTypeDriver,
    44  	}
    45  
    46  	// PluginConfig is the exec driver factory function registered in the
    47  	// plugin catalog.
    48  	PluginConfig = &loader.InternalPluginConfig{
    49  		Config:  map[string]interface{}{},
    50  		Factory: func(l hclog.Logger) interface{} { return NewExecDriver(l) },
    51  	}
    52  
    53  	// pluginInfo is the response returned for the PluginInfo RPC
    54  	pluginInfo = &base.PluginInfoResponse{
    55  		Type:              base.PluginTypeDriver,
    56  		PluginApiVersions: []string{drivers.ApiVersion010},
    57  		PluginVersion:     "0.1.0",
    58  		Name:              pluginName,
    59  	}
    60  
    61  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    62  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{})
    63  
    64  	// taskConfigSpec is the hcl specification for the driver config section of
    65  	// a task within a job. It is returned in the TaskConfigSchema RPC
    66  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    67  		"command": hclspec.NewAttr("command", "string", true),
    68  		"args":    hclspec.NewAttr("args", "list(string)", false),
    69  	})
    70  
    71  	// capabilities is returned by the Capabilities RPC and indicates what
    72  	// optional features this driver supports
    73  	capabilities = &drivers.Capabilities{
    74  		SendSignals: true,
    75  		Exec:        true,
    76  		FSIsolation: drivers.FSIsolationChroot,
    77  	}
    78  )
    79  
    80  // Driver fork/execs tasks using many of the underlying OS's isolation
    81  // features where configured.
    82  type Driver struct {
    83  	// eventer is used to handle multiplexing of TaskEvents calls such that an
    84  	// event can be broadcast to all callers
    85  	eventer *eventer.Eventer
    86  
    87  	// nomadConfig is the client config from nomad
    88  	nomadConfig *base.ClientDriverConfig
    89  
    90  	// tasks is the in memory datastore mapping taskIDs to driverHandles
    91  	tasks *taskStore
    92  
    93  	// ctx is the context for the driver. It is passed to other subsystems to
    94  	// coordinate shutdown
    95  	ctx context.Context
    96  
    97  	// signalShutdown is called when the driver is shutting down and cancels the
    98  	// ctx passed to any subsystems
    99  	signalShutdown context.CancelFunc
   100  
   101  	// logger will log to the Nomad agent
   102  	logger hclog.Logger
   103  
   104  	// A tri-state boolean to know if the fingerprinting has happened and
   105  	// whether it has been successful
   106  	fingerprintSuccess *bool
   107  	fingerprintLock    sync.Mutex
   108  }
   109  
   110  // TaskConfig is the driver configuration of a task within a job
   111  type TaskConfig struct {
   112  	Command string   `codec:"command"`
   113  	Args    []string `codec:"args"`
   114  }
   115  
   116  // TaskState is the state which is encoded in the handle returned in
   117  // StartTask. This information is needed to rebuild the task state and handler
   118  // during recovery.
   119  type TaskState struct {
   120  	ReattachConfig *pstructs.ReattachConfig
   121  	TaskConfig     *drivers.TaskConfig
   122  	Pid            int
   123  	StartedAt      time.Time
   124  }
   125  
   126  // NewExecDriver returns a new DrivePlugin implementation
   127  func NewExecDriver(logger hclog.Logger) drivers.DriverPlugin {
   128  	ctx, cancel := context.WithCancel(context.Background())
   129  	logger = logger.Named(pluginName)
   130  	return &Driver{
   131  		eventer:        eventer.NewEventer(ctx, logger),
   132  		tasks:          newTaskStore(),
   133  		ctx:            ctx,
   134  		signalShutdown: cancel,
   135  		logger:         logger,
   136  	}
   137  }
   138  
   139  // setFingerprintSuccess marks the driver as having fingerprinted successfully
   140  func (d *Driver) setFingerprintSuccess() {
   141  	d.fingerprintLock.Lock()
   142  	d.fingerprintSuccess = helper.BoolToPtr(true)
   143  	d.fingerprintLock.Unlock()
   144  }
   145  
   146  // setFingerprintFailure marks the driver as having failed fingerprinting
   147  func (d *Driver) setFingerprintFailure() {
   148  	d.fingerprintLock.Lock()
   149  	d.fingerprintSuccess = helper.BoolToPtr(false)
   150  	d.fingerprintLock.Unlock()
   151  }
   152  
   153  // fingerprintSuccessful returns true if the driver has
   154  // never fingerprinted or has successfully fingerprinted
   155  func (d *Driver) fingerprintSuccessful() bool {
   156  	d.fingerprintLock.Lock()
   157  	defer d.fingerprintLock.Unlock()
   158  	return d.fingerprintSuccess == nil || *d.fingerprintSuccess
   159  }
   160  
   161  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   162  	return pluginInfo, nil
   163  }
   164  
   165  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   166  	return configSpec, nil
   167  }
   168  
   169  func (d *Driver) SetConfig(cfg *base.Config) error {
   170  	if cfg != nil && cfg.AgentConfig != nil {
   171  		d.nomadConfig = cfg.AgentConfig.Driver
   172  	}
   173  	return nil
   174  }
   175  
   176  func (d *Driver) Shutdown() {
   177  	d.signalShutdown()
   178  }
   179  
   180  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   181  	return taskConfigSpec, nil
   182  }
   183  
   184  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   185  	return capabilities, nil
   186  }
   187  
   188  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   189  	ch := make(chan *drivers.Fingerprint)
   190  	go d.handleFingerprint(ctx, ch)
   191  	return ch, nil
   192  
   193  }
   194  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   195  	defer close(ch)
   196  	ticker := time.NewTimer(0)
   197  	for {
   198  		select {
   199  		case <-ctx.Done():
   200  			return
   201  		case <-d.ctx.Done():
   202  			return
   203  		case <-ticker.C:
   204  			ticker.Reset(fingerprintPeriod)
   205  			ch <- d.buildFingerprint()
   206  		}
   207  	}
   208  }
   209  
   210  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   211  	if runtime.GOOS != "linux" {
   212  		d.setFingerprintFailure()
   213  		return &drivers.Fingerprint{
   214  			Health:            drivers.HealthStateUndetected,
   215  			HealthDescription: "exec driver unsupported on client OS",
   216  		}
   217  	}
   218  
   219  	fp := &drivers.Fingerprint{
   220  		Attributes:        map[string]*pstructs.Attribute{},
   221  		Health:            drivers.HealthStateHealthy,
   222  		HealthDescription: drivers.DriverHealthy,
   223  	}
   224  
   225  	if !utils.IsUnixRoot() {
   226  		fp.Health = drivers.HealthStateUndetected
   227  		fp.HealthDescription = drivers.DriverRequiresRootMessage
   228  		d.setFingerprintFailure()
   229  		return fp
   230  	}
   231  
   232  	mount, err := fingerprint.FindCgroupMountpointDir()
   233  	if err != nil {
   234  		fp.Health = drivers.HealthStateUnhealthy
   235  		fp.HealthDescription = drivers.NoCgroupMountMessage
   236  		if d.fingerprintSuccessful() {
   237  			d.logger.Warn(fp.HealthDescription, "error", err)
   238  		}
   239  		d.setFingerprintFailure()
   240  		return fp
   241  	}
   242  
   243  	if mount == "" {
   244  		fp.Health = drivers.HealthStateUnhealthy
   245  		fp.HealthDescription = drivers.CgroupMountEmpty
   246  		d.setFingerprintFailure()
   247  		return fp
   248  	}
   249  
   250  	fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true)
   251  	d.setFingerprintSuccess()
   252  	return fp
   253  }
   254  
   255  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   256  	if handle == nil {
   257  		return fmt.Errorf("handle cannot be nil")
   258  	}
   259  
   260  	// COMPAT(0.10): pre 0.9 upgrade path check
   261  	if handle.Version == 0 {
   262  		return d.recoverPre09Task(handle)
   263  	}
   264  
   265  	// If already attached to handle there's nothing to recover.
   266  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   267  		d.logger.Trace("nothing to recover; task already exists",
   268  			"task_id", handle.Config.ID,
   269  			"task_name", handle.Config.Name,
   270  		)
   271  		return nil
   272  	}
   273  
   274  	// Handle doesn't already exist, try to reattach
   275  	var taskState TaskState
   276  	if err := handle.GetDriverState(&taskState); err != nil {
   277  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   278  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   279  	}
   280  
   281  	// Create client for reattached executor
   282  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   283  	if err != nil {
   284  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   285  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   286  	}
   287  
   288  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   289  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   290  	if err != nil {
   291  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   292  		return fmt.Errorf("failed to reattach to executor: %v", err)
   293  	}
   294  
   295  	h := &taskHandle{
   296  		exec:         exec,
   297  		pid:          taskState.Pid,
   298  		pluginClient: pluginClient,
   299  		taskConfig:   taskState.TaskConfig,
   300  		procState:    drivers.TaskStateRunning,
   301  		startedAt:    taskState.StartedAt,
   302  		exitResult:   &drivers.ExitResult{},
   303  	}
   304  
   305  	d.tasks.Set(taskState.TaskConfig.ID, h)
   306  
   307  	go h.run()
   308  	return nil
   309  }
   310  
   311  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   312  	if _, ok := d.tasks.Get(cfg.ID); ok {
   313  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   314  	}
   315  
   316  	var driverConfig TaskConfig
   317  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   318  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   319  	}
   320  
   321  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   322  	handle := drivers.NewTaskHandle(taskHandleVersion)
   323  	handle.Config = cfg
   324  
   325  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   326  	executorConfig := &executor.ExecutorConfig{
   327  		LogFile:     pluginLogFile,
   328  		LogLevel:    "debug",
   329  		FSIsolation: true,
   330  	}
   331  
   332  	exec, pluginClient, err := executor.CreateExecutor(
   333  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
   334  		d.nomadConfig, executorConfig)
   335  	if err != nil {
   336  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   337  	}
   338  
   339  	user := cfg.User
   340  	if user == "" {
   341  		user = "nobody"
   342  	}
   343  
   344  	execCmd := &executor.ExecCommand{
   345  		Cmd:            driverConfig.Command,
   346  		Args:           driverConfig.Args,
   347  		Env:            cfg.EnvList(),
   348  		User:           user,
   349  		ResourceLimits: true,
   350  		Resources:      cfg.Resources,
   351  		TaskDir:        cfg.TaskDir().Dir,
   352  		StdoutPath:     cfg.StdoutPath,
   353  		StderrPath:     cfg.StderrPath,
   354  		Mounts:         cfg.Mounts,
   355  		Devices:        cfg.Devices,
   356  	}
   357  
   358  	ps, err := exec.Launch(execCmd)
   359  	if err != nil {
   360  		pluginClient.Kill()
   361  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   362  	}
   363  
   364  	h := &taskHandle{
   365  		exec:         exec,
   366  		pid:          ps.Pid,
   367  		pluginClient: pluginClient,
   368  		taskConfig:   cfg,
   369  		procState:    drivers.TaskStateRunning,
   370  		startedAt:    time.Now().Round(time.Millisecond),
   371  		logger:       d.logger,
   372  	}
   373  
   374  	driverState := TaskState{
   375  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   376  		Pid:            ps.Pid,
   377  		TaskConfig:     cfg,
   378  		StartedAt:      h.startedAt,
   379  	}
   380  
   381  	if err := handle.SetDriverState(&driverState); err != nil {
   382  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   383  		exec.Shutdown("", 0)
   384  		pluginClient.Kill()
   385  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   386  	}
   387  
   388  	d.tasks.Set(cfg.ID, h)
   389  	go h.run()
   390  	return handle, nil, nil
   391  }
   392  
   393  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   394  	handle, ok := d.tasks.Get(taskID)
   395  	if !ok {
   396  		return nil, drivers.ErrTaskNotFound
   397  	}
   398  
   399  	ch := make(chan *drivers.ExitResult)
   400  	go d.handleWait(ctx, handle, ch)
   401  
   402  	return ch, nil
   403  }
   404  
   405  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   406  	defer close(ch)
   407  	var result *drivers.ExitResult
   408  	ps, err := handle.exec.Wait(ctx)
   409  	if err != nil {
   410  		result = &drivers.ExitResult{
   411  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   412  		}
   413  	} else {
   414  		result = &drivers.ExitResult{
   415  			ExitCode: ps.ExitCode,
   416  			Signal:   ps.Signal,
   417  		}
   418  	}
   419  
   420  	select {
   421  	case <-ctx.Done():
   422  		return
   423  	case <-d.ctx.Done():
   424  		return
   425  	case ch <- result:
   426  	}
   427  }
   428  
   429  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   430  	handle, ok := d.tasks.Get(taskID)
   431  	if !ok {
   432  		return drivers.ErrTaskNotFound
   433  	}
   434  
   435  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   436  		if handle.pluginClient.Exited() {
   437  			return nil
   438  		}
   439  		return fmt.Errorf("executor Shutdown failed: %v", err)
   440  	}
   441  
   442  	return nil
   443  }
   444  
   445  func (d *Driver) DestroyTask(taskID string, force bool) error {
   446  	handle, ok := d.tasks.Get(taskID)
   447  	if !ok {
   448  		return drivers.ErrTaskNotFound
   449  	}
   450  
   451  	if handle.IsRunning() && !force {
   452  		return fmt.Errorf("cannot destroy running task")
   453  	}
   454  
   455  	if !handle.pluginClient.Exited() {
   456  		if handle.IsRunning() {
   457  			if err := handle.exec.Shutdown("", 0); err != nil {
   458  				handle.logger.Error("destroying executor failed", "err", err)
   459  			}
   460  		}
   461  
   462  		handle.pluginClient.Kill()
   463  	}
   464  
   465  	d.tasks.Delete(taskID)
   466  	return nil
   467  }
   468  
   469  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   470  	handle, ok := d.tasks.Get(taskID)
   471  	if !ok {
   472  		return nil, drivers.ErrTaskNotFound
   473  	}
   474  
   475  	return handle.TaskStatus(), nil
   476  }
   477  
   478  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   479  	handle, ok := d.tasks.Get(taskID)
   480  	if !ok {
   481  		return nil, drivers.ErrTaskNotFound
   482  	}
   483  
   484  	return handle.exec.Stats(ctx, interval)
   485  }
   486  
   487  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   488  	return d.eventer.TaskEvents(ctx)
   489  }
   490  
   491  func (d *Driver) SignalTask(taskID string, signal string) error {
   492  	handle, ok := d.tasks.Get(taskID)
   493  	if !ok {
   494  		return drivers.ErrTaskNotFound
   495  	}
   496  
   497  	sig := os.Interrupt
   498  	if s, ok := signals.SignalLookup[signal]; ok {
   499  		sig = s
   500  	} else {
   501  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   502  
   503  	}
   504  	return handle.exec.Signal(sig)
   505  }
   506  
   507  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   508  	if len(cmd) == 0 {
   509  		return nil, fmt.Errorf("error cmd must have at least one value")
   510  	}
   511  	handle, ok := d.tasks.Get(taskID)
   512  	if !ok {
   513  		return nil, drivers.ErrTaskNotFound
   514  	}
   515  
   516  	args := []string{}
   517  	if len(cmd) > 1 {
   518  		args = cmd[1:]
   519  	}
   520  
   521  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args)
   522  	if err != nil {
   523  		return nil, err
   524  	}
   525  
   526  	return &drivers.ExecTaskResult{
   527  		Stdout: out,
   528  		ExitResult: &drivers.ExitResult{
   529  			ExitCode: exitCode,
   530  		},
   531  	}, nil
   532  }
   533  
   534  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   535  
   536  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   537  	taskID string,
   538  	command []string,
   539  	tty bool,
   540  	stream drivers.ExecTaskStream) error {
   541  
   542  	if len(command) == 0 {
   543  		return fmt.Errorf("error cmd must have at least one value")
   544  	}
   545  	handle, ok := d.tasks.Get(taskID)
   546  	if !ok {
   547  		return drivers.ErrTaskNotFound
   548  	}
   549  
   550  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   551  }