github.com/bigcommerce/nomad@v0.9.3-bc/drivers/rawexec/driver.go (about)

     1  package rawexec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/consul-template/signals"
    14  	hclog "github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    18  	"github.com/hashicorp/nomad/plugins/base"
    19  	"github.com/hashicorp/nomad/plugins/drivers"
    20  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    21  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    22  )
    23  
    24  const (
    25  	// pluginName is the name of the plugin
    26  	pluginName = "raw_exec"
    27  
    28  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    29  	fingerprintPeriod = 30 * time.Second
    30  
    31  	// taskHandleVersion is the version of task handle which this driver sets
    32  	// and understands how to decode driver state
    33  	taskHandleVersion = 1
    34  )
    35  
    36  var (
    37  	// PluginID is the rawexec plugin metadata registered in the plugin
    38  	// catalog.
    39  	PluginID = loader.PluginID{
    40  		Name:       pluginName,
    41  		PluginType: base.PluginTypeDriver,
    42  	}
    43  
    44  	// PluginConfig is the rawexec factory function registered in the
    45  	// plugin catalog.
    46  	PluginConfig = &loader.InternalPluginConfig{
    47  		Config:  map[string]interface{}{},
    48  		Factory: func(l hclog.Logger) interface{} { return NewRawExecDriver(l) },
    49  	}
    50  )
    51  
    52  // PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options.
    53  func PluginLoader(opts map[string]string) (map[string]interface{}, error) {
    54  	conf := map[string]interface{}{}
    55  	if v, err := strconv.ParseBool(opts["driver.raw_exec.enable"]); err == nil {
    56  		conf["enabled"] = v
    57  	}
    58  	if v, err := strconv.ParseBool(opts["driver.raw_exec.no_cgroups"]); err == nil {
    59  		conf["no_cgroups"] = v
    60  	}
    61  	return conf, nil
    62  }
    63  
    64  var (
    65  	// pluginInfo is the response returned for the PluginInfo RPC
    66  	pluginInfo = &base.PluginInfoResponse{
    67  		Type:              base.PluginTypeDriver,
    68  		PluginApiVersions: []string{drivers.ApiVersion010},
    69  		PluginVersion:     "0.1.0",
    70  		Name:              pluginName,
    71  	}
    72  
    73  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    74  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    75  		"enabled": hclspec.NewDefault(
    76  			hclspec.NewAttr("enabled", "bool", false),
    77  			hclspec.NewLiteral("false"),
    78  		),
    79  		"no_cgroups": hclspec.NewDefault(
    80  			hclspec.NewAttr("no_cgroups", "bool", false),
    81  			hclspec.NewLiteral("false"),
    82  		),
    83  	})
    84  
    85  	// taskConfigSpec is the hcl specification for the driver config section of
    86  	// a task within a job. It is returned in the TaskConfigSchema RPC
    87  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    88  		"command": hclspec.NewAttr("command", "string", true),
    89  		"args":    hclspec.NewAttr("args", "list(string)", false),
    90  	})
    91  
    92  	// capabilities is returned by the Capabilities RPC and indicates what
    93  	// optional features this driver supports
    94  	capabilities = &drivers.Capabilities{
    95  		SendSignals: true,
    96  		Exec:        true,
    97  		FSIsolation: drivers.FSIsolationNone,
    98  	}
    99  )
   100  
   101  // Driver is a privileged version of the exec driver. It provides no
   102  // resource isolation and just fork/execs. The Exec driver should be preferred
   103  // and this should only be used when explicitly needed.
   104  type Driver struct {
   105  	// eventer is used to handle multiplexing of TaskEvents calls such that an
   106  	// event can be broadcast to all callers
   107  	eventer *eventer.Eventer
   108  
   109  	// config is the driver configuration set by the SetConfig RPC
   110  	config *Config
   111  
   112  	// nomadConfig is the client config from nomad
   113  	nomadConfig *base.ClientDriverConfig
   114  
   115  	// tasks is the in memory datastore mapping taskIDs to driverHandles
   116  	tasks *taskStore
   117  
   118  	// ctx is the context for the driver. It is passed to other subsystems to
   119  	// coordinate shutdown
   120  	ctx context.Context
   121  
   122  	// signalShutdown is called when the driver is shutting down and cancels the
   123  	// ctx passed to any subsystems
   124  	signalShutdown context.CancelFunc
   125  
   126  	// logger will log to the Nomad agent
   127  	logger hclog.Logger
   128  }
   129  
   130  // Config is the driver configuration set by the SetConfig RPC call
   131  type Config struct {
   132  	// NoCgroups tracks whether we should use a cgroup to manage the process
   133  	// tree
   134  	NoCgroups bool `codec:"no_cgroups"`
   135  
   136  	// Enabled is set to true to enable the raw_exec driver
   137  	Enabled bool `codec:"enabled"`
   138  }
   139  
   140  // TaskConfig is the driver configuration of a task within a job
   141  type TaskConfig struct {
   142  	Command string   `codec:"command"`
   143  	Args    []string `codec:"args"`
   144  }
   145  
   146  // TaskState is the state which is encoded in the handle returned in
   147  // StartTask. This information is needed to rebuild the task state and handler
   148  // during recovery.
   149  type TaskState struct {
   150  	ReattachConfig *pstructs.ReattachConfig
   151  	TaskConfig     *drivers.TaskConfig
   152  	Pid            int
   153  	StartedAt      time.Time
   154  }
   155  
   156  // NewRawExecDriver returns a new DriverPlugin implementation
   157  func NewRawExecDriver(logger hclog.Logger) drivers.DriverPlugin {
   158  	ctx, cancel := context.WithCancel(context.Background())
   159  	logger = logger.Named(pluginName)
   160  	return &Driver{
   161  		eventer:        eventer.NewEventer(ctx, logger),
   162  		config:         &Config{},
   163  		tasks:          newTaskStore(),
   164  		ctx:            ctx,
   165  		signalShutdown: cancel,
   166  		logger:         logger,
   167  	}
   168  }
   169  
   170  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   171  	return pluginInfo, nil
   172  }
   173  
   174  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   175  	return configSpec, nil
   176  }
   177  
   178  func (d *Driver) SetConfig(cfg *base.Config) error {
   179  	var config Config
   180  	if len(cfg.PluginConfig) != 0 {
   181  		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
   182  			return err
   183  		}
   184  	}
   185  
   186  	d.config = &config
   187  	if cfg.AgentConfig != nil {
   188  		d.nomadConfig = cfg.AgentConfig.Driver
   189  	}
   190  	return nil
   191  }
   192  
   193  func (d *Driver) Shutdown() {
   194  	d.signalShutdown()
   195  }
   196  
   197  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   198  	return taskConfigSpec, nil
   199  }
   200  
   201  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   202  	return capabilities, nil
   203  }
   204  
   205  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   206  	ch := make(chan *drivers.Fingerprint)
   207  	go d.handleFingerprint(ctx, ch)
   208  	return ch, nil
   209  }
   210  
   211  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   212  	defer close(ch)
   213  	ticker := time.NewTimer(0)
   214  	for {
   215  		select {
   216  		case <-ctx.Done():
   217  			return
   218  		case <-d.ctx.Done():
   219  			return
   220  		case <-ticker.C:
   221  			ticker.Reset(fingerprintPeriod)
   222  			ch <- d.buildFingerprint()
   223  		}
   224  	}
   225  }
   226  
   227  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   228  	var health drivers.HealthState
   229  	var desc string
   230  	attrs := map[string]*pstructs.Attribute{}
   231  	if d.config.Enabled {
   232  		health = drivers.HealthStateHealthy
   233  		desc = drivers.DriverHealthy
   234  		attrs["driver.raw_exec"] = pstructs.NewBoolAttribute(true)
   235  	} else {
   236  		health = drivers.HealthStateUndetected
   237  		desc = "disabled"
   238  	}
   239  
   240  	return &drivers.Fingerprint{
   241  		Attributes:        attrs,
   242  		Health:            health,
   243  		HealthDescription: desc,
   244  	}
   245  }
   246  
   247  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   248  	if handle == nil {
   249  		return fmt.Errorf("handle cannot be nil")
   250  	}
   251  
   252  	// COMPAT(0.10): pre 0.9 upgrade path check
   253  	if handle.Version == 0 {
   254  		return d.recoverPre09Task(handle)
   255  	}
   256  
   257  	// If already attached to handle there's nothing to recover.
   258  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   259  		d.logger.Trace("nothing to recover; task already exists",
   260  			"task_id", handle.Config.ID,
   261  			"task_name", handle.Config.Name,
   262  		)
   263  		return nil
   264  	}
   265  
   266  	// Handle doesn't already exist, try to reattach
   267  	var taskState TaskState
   268  	if err := handle.GetDriverState(&taskState); err != nil {
   269  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   270  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   271  	}
   272  
   273  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   274  	if err != nil {
   275  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   276  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   277  	}
   278  
   279  	// Create client for reattached executor
   280  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   281  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   282  	if err != nil {
   283  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   284  		return fmt.Errorf("failed to reattach to executor: %v", err)
   285  	}
   286  
   287  	h := &taskHandle{
   288  		exec:         exec,
   289  		pid:          taskState.Pid,
   290  		pluginClient: pluginClient,
   291  		taskConfig:   taskState.TaskConfig,
   292  		procState:    drivers.TaskStateRunning,
   293  		startedAt:    taskState.StartedAt,
   294  		exitResult:   &drivers.ExitResult{},
   295  		logger:       d.logger,
   296  		doneCh:       make(chan struct{}),
   297  	}
   298  
   299  	d.tasks.Set(taskState.TaskConfig.ID, h)
   300  
   301  	go h.run()
   302  	return nil
   303  }
   304  
   305  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   306  	if _, ok := d.tasks.Get(cfg.ID); ok {
   307  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   308  	}
   309  
   310  	var driverConfig TaskConfig
   311  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   312  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   313  	}
   314  
   315  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   316  	handle := drivers.NewTaskHandle(taskHandleVersion)
   317  	handle.Config = cfg
   318  
   319  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   320  	executorConfig := &executor.ExecutorConfig{
   321  		LogFile:  pluginLogFile,
   322  		LogLevel: "debug",
   323  	}
   324  
   325  	exec, pluginClient, err := executor.CreateExecutor(
   326  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
   327  		d.nomadConfig, executorConfig)
   328  	if err != nil {
   329  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   330  	}
   331  
   332  	// Only use cgroups when running as root on linux - Doing so in other cases
   333  	// will cause an error.
   334  	useCgroups := !d.config.NoCgroups && runtime.GOOS == "linux" && syscall.Geteuid() == 0
   335  
   336  	execCmd := &executor.ExecCommand{
   337  		Cmd:                driverConfig.Command,
   338  		Args:               driverConfig.Args,
   339  		Env:                cfg.EnvList(),
   340  		User:               cfg.User,
   341  		BasicProcessCgroup: useCgroups,
   342  		TaskDir:            cfg.TaskDir().Dir,
   343  		StdoutPath:         cfg.StdoutPath,
   344  		StderrPath:         cfg.StderrPath,
   345  	}
   346  
   347  	ps, err := exec.Launch(execCmd)
   348  	if err != nil {
   349  		pluginClient.Kill()
   350  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   351  	}
   352  
   353  	h := &taskHandle{
   354  		exec:         exec,
   355  		pid:          ps.Pid,
   356  		pluginClient: pluginClient,
   357  		taskConfig:   cfg,
   358  		procState:    drivers.TaskStateRunning,
   359  		startedAt:    time.Now().Round(time.Millisecond),
   360  		logger:       d.logger,
   361  		doneCh:       make(chan struct{}),
   362  	}
   363  
   364  	driverState := TaskState{
   365  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   366  		Pid:            ps.Pid,
   367  		TaskConfig:     cfg,
   368  		StartedAt:      h.startedAt,
   369  	}
   370  
   371  	if err := handle.SetDriverState(&driverState); err != nil {
   372  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   373  		exec.Shutdown("", 0)
   374  		pluginClient.Kill()
   375  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   376  	}
   377  
   378  	d.tasks.Set(cfg.ID, h)
   379  	go h.run()
   380  	return handle, nil, nil
   381  }
   382  
   383  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   384  	handle, ok := d.tasks.Get(taskID)
   385  	if !ok {
   386  		return nil, drivers.ErrTaskNotFound
   387  	}
   388  
   389  	ch := make(chan *drivers.ExitResult)
   390  	go d.handleWait(ctx, handle, ch)
   391  
   392  	return ch, nil
   393  }
   394  
   395  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   396  	defer close(ch)
   397  	var result *drivers.ExitResult
   398  	ps, err := handle.exec.Wait(ctx)
   399  	if err != nil {
   400  		result = &drivers.ExitResult{
   401  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   402  		}
   403  	} else {
   404  		result = &drivers.ExitResult{
   405  			ExitCode: ps.ExitCode,
   406  			Signal:   ps.Signal,
   407  		}
   408  	}
   409  
   410  	select {
   411  	case <-ctx.Done():
   412  		return
   413  	case <-d.ctx.Done():
   414  		return
   415  	case ch <- result:
   416  	}
   417  }
   418  
   419  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   420  	handle, ok := d.tasks.Get(taskID)
   421  	if !ok {
   422  		return drivers.ErrTaskNotFound
   423  	}
   424  
   425  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   426  		if handle.pluginClient.Exited() {
   427  			return nil
   428  		}
   429  		return fmt.Errorf("executor Shutdown failed: %v", err)
   430  	}
   431  
   432  	// Wait for handle to finish
   433  	<-handle.doneCh
   434  
   435  	// Kill executor
   436  	handle.pluginClient.Kill()
   437  
   438  	return nil
   439  }
   440  
   441  func (d *Driver) DestroyTask(taskID string, force bool) error {
   442  	handle, ok := d.tasks.Get(taskID)
   443  	if !ok {
   444  		return drivers.ErrTaskNotFound
   445  	}
   446  
   447  	if handle.IsRunning() && !force {
   448  		return fmt.Errorf("cannot destroy running task")
   449  	}
   450  
   451  	if !handle.pluginClient.Exited() {
   452  		if handle.IsRunning() {
   453  			if err := handle.exec.Shutdown("", 0); err != nil {
   454  				handle.logger.Error("destroying executor failed", "err", err)
   455  			}
   456  		}
   457  
   458  		handle.pluginClient.Kill()
   459  	}
   460  
   461  	d.tasks.Delete(taskID)
   462  	return nil
   463  }
   464  
   465  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   466  	handle, ok := d.tasks.Get(taskID)
   467  	if !ok {
   468  		return nil, drivers.ErrTaskNotFound
   469  	}
   470  
   471  	return handle.TaskStatus(), nil
   472  }
   473  
   474  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   475  	handle, ok := d.tasks.Get(taskID)
   476  	if !ok {
   477  		return nil, drivers.ErrTaskNotFound
   478  	}
   479  
   480  	return handle.exec.Stats(ctx, interval)
   481  }
   482  
   483  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   484  	return d.eventer.TaskEvents(ctx)
   485  }
   486  
   487  func (d *Driver) SignalTask(taskID string, signal string) error {
   488  	handle, ok := d.tasks.Get(taskID)
   489  	if !ok {
   490  		return drivers.ErrTaskNotFound
   491  	}
   492  
   493  	sig := os.Interrupt
   494  	if s, ok := signals.SignalLookup[signal]; ok {
   495  		sig = s
   496  	} else {
   497  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   498  	}
   499  
   500  	return handle.exec.Signal(sig)
   501  }
   502  
   503  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   504  	if len(cmd) == 0 {
   505  		return nil, fmt.Errorf("error cmd must have at least one value")
   506  	}
   507  	handle, ok := d.tasks.Get(taskID)
   508  	if !ok {
   509  		return nil, drivers.ErrTaskNotFound
   510  	}
   511  
   512  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:])
   513  	if err != nil {
   514  		return nil, err
   515  	}
   516  
   517  	return &drivers.ExecTaskResult{
   518  		Stdout: out,
   519  		ExitResult: &drivers.ExitResult{
   520  			ExitCode: exitCode,
   521  		},
   522  	}, nil
   523  }
   524  
   525  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   526  
   527  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   528  	taskID string,
   529  	command []string,
   530  	tty bool,
   531  	stream drivers.ExecTaskStream) error {
   532  
   533  	if len(command) == 0 {
   534  		return fmt.Errorf("error cmd must have at least one value")
   535  	}
   536  	handle, ok := d.tasks.Get(taskID)
   537  	if !ok {
   538  		return drivers.ErrTaskNotFound
   539  	}
   540  
   541  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   542  }