github.com/janma/nomad@v0.11.3/drivers/rawexec/driver.go (about)

     1  package rawexec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/consul-template/signals"
    14  	hclog "github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    18  	"github.com/hashicorp/nomad/plugins/base"
    19  	"github.com/hashicorp/nomad/plugins/drivers"
    20  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    21  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    22  )
    23  
    24  const (
    25  	// pluginName is the name of the plugin
    26  	pluginName = "raw_exec"
    27  
    28  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    29  	fingerprintPeriod = 30 * time.Second
    30  
    31  	// taskHandleVersion is the version of task handle which this driver sets
    32  	// and understands how to decode driver state
    33  	taskHandleVersion = 1
    34  )
    35  
    36  var (
    37  	// PluginID is the rawexec plugin metadata registered in the plugin
    38  	// catalog.
    39  	PluginID = loader.PluginID{
    40  		Name:       pluginName,
    41  		PluginType: base.PluginTypeDriver,
    42  	}
    43  
    44  	// PluginConfig is the rawexec factory function registered in the
    45  	// plugin catalog.
    46  	PluginConfig = &loader.InternalPluginConfig{
    47  		Config:  map[string]interface{}{},
    48  		Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewRawExecDriver(ctx, l) },
    49  	}
    50  
    51  	errDisabledDriver = fmt.Errorf("raw_exec is disabled")
    52  )
    53  
    54  // PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options.
    55  func PluginLoader(opts map[string]string) (map[string]interface{}, error) {
    56  	conf := map[string]interface{}{}
    57  	if v, err := strconv.ParseBool(opts["driver.raw_exec.enable"]); err == nil {
    58  		conf["enabled"] = v
    59  	}
    60  	if v, err := strconv.ParseBool(opts["driver.raw_exec.no_cgroups"]); err == nil {
    61  		conf["no_cgroups"] = v
    62  	}
    63  	return conf, nil
    64  }
    65  
    66  var (
    67  	// pluginInfo is the response returned for the PluginInfo RPC
    68  	pluginInfo = &base.PluginInfoResponse{
    69  		Type:              base.PluginTypeDriver,
    70  		PluginApiVersions: []string{drivers.ApiVersion010},
    71  		PluginVersion:     "0.1.0",
    72  		Name:              pluginName,
    73  	}
    74  
    75  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    76  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    77  		"enabled": hclspec.NewDefault(
    78  			hclspec.NewAttr("enabled", "bool", false),
    79  			hclspec.NewLiteral("false"),
    80  		),
    81  		"no_cgroups": hclspec.NewDefault(
    82  			hclspec.NewAttr("no_cgroups", "bool", false),
    83  			hclspec.NewLiteral("false"),
    84  		),
    85  	})
    86  
    87  	// taskConfigSpec is the hcl specification for the driver config section of
    88  	// a task within a job. It is returned in the TaskConfigSchema RPC
    89  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    90  		"command": hclspec.NewAttr("command", "string", true),
    91  		"args":    hclspec.NewAttr("args", "list(string)", false),
    92  	})
    93  
    94  	// capabilities is returned by the Capabilities RPC and indicates what
    95  	// optional features this driver supports
    96  	capabilities = &drivers.Capabilities{
    97  		SendSignals: true,
    98  		Exec:        true,
    99  		FSIsolation: drivers.FSIsolationNone,
   100  		NetIsolationModes: []drivers.NetIsolationMode{
   101  			drivers.NetIsolationModeHost,
   102  			drivers.NetIsolationModeGroup,
   103  		},
   104  		MountConfigs: drivers.MountConfigSupportNone,
   105  	}
   106  )
   107  
   108  // Driver is a privileged version of the exec driver. It provides no
   109  // resource isolation and just fork/execs. The Exec driver should be preferred
   110  // and this should only be used when explicitly needed.
   111  type Driver struct {
   112  	// eventer is used to handle multiplexing of TaskEvents calls such that an
   113  	// event can be broadcast to all callers
   114  	eventer *eventer.Eventer
   115  
   116  	// config is the driver configuration set by the SetConfig RPC
   117  	config *Config
   118  
   119  	// nomadConfig is the client config from nomad
   120  	nomadConfig *base.ClientDriverConfig
   121  
   122  	// tasks is the in memory datastore mapping taskIDs to driverHandles
   123  	tasks *taskStore
   124  
   125  	// ctx is the context for the driver. It is passed to other subsystems to
   126  	// coordinate shutdown
   127  	ctx context.Context
   128  
   129  	// logger will log to the Nomad agent
   130  	logger hclog.Logger
   131  }
   132  
   133  // Config is the driver configuration set by the SetConfig RPC call
   134  type Config struct {
   135  	// NoCgroups tracks whether we should use a cgroup to manage the process
   136  	// tree
   137  	NoCgroups bool `codec:"no_cgroups"`
   138  
   139  	// Enabled is set to true to enable the raw_exec driver
   140  	Enabled bool `codec:"enabled"`
   141  }
   142  
   143  // TaskConfig is the driver configuration of a task within a job
   144  type TaskConfig struct {
   145  	Command string   `codec:"command"`
   146  	Args    []string `codec:"args"`
   147  }
   148  
   149  // TaskState is the state which is encoded in the handle returned in
   150  // StartTask. This information is needed to rebuild the task state and handler
   151  // during recovery.
   152  type TaskState struct {
   153  	ReattachConfig *pstructs.ReattachConfig
   154  	TaskConfig     *drivers.TaskConfig
   155  	Pid            int
   156  	StartedAt      time.Time
   157  }
   158  
   159  // NewRawExecDriver returns a new DriverPlugin implementation
   160  func NewRawExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin {
   161  	logger = logger.Named(pluginName)
   162  	return &Driver{
   163  		eventer: eventer.NewEventer(ctx, logger),
   164  		config:  &Config{},
   165  		tasks:   newTaskStore(),
   166  		ctx:     ctx,
   167  		logger:  logger,
   168  	}
   169  }
   170  
   171  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   172  	return pluginInfo, nil
   173  }
   174  
   175  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   176  	return configSpec, nil
   177  }
   178  
   179  func (d *Driver) SetConfig(cfg *base.Config) error {
   180  	var config Config
   181  	if len(cfg.PluginConfig) != 0 {
   182  		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
   183  			return err
   184  		}
   185  	}
   186  
   187  	d.config = &config
   188  	if cfg.AgentConfig != nil {
   189  		d.nomadConfig = cfg.AgentConfig.Driver
   190  	}
   191  	return nil
   192  }
   193  
   194  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   195  	return taskConfigSpec, nil
   196  }
   197  
   198  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   199  	return capabilities, nil
   200  }
   201  
   202  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   203  	ch := make(chan *drivers.Fingerprint)
   204  	go d.handleFingerprint(ctx, ch)
   205  	return ch, nil
   206  }
   207  
   208  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   209  	defer close(ch)
   210  	ticker := time.NewTimer(0)
   211  	for {
   212  		select {
   213  		case <-ctx.Done():
   214  			return
   215  		case <-d.ctx.Done():
   216  			return
   217  		case <-ticker.C:
   218  			ticker.Reset(fingerprintPeriod)
   219  			ch <- d.buildFingerprint()
   220  		}
   221  	}
   222  }
   223  
   224  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   225  	var health drivers.HealthState
   226  	var desc string
   227  	attrs := map[string]*pstructs.Attribute{}
   228  	if d.config.Enabled {
   229  		health = drivers.HealthStateHealthy
   230  		desc = drivers.DriverHealthy
   231  		attrs["driver.raw_exec"] = pstructs.NewBoolAttribute(true)
   232  	} else {
   233  		health = drivers.HealthStateUndetected
   234  		desc = "disabled"
   235  	}
   236  
   237  	return &drivers.Fingerprint{
   238  		Attributes:        attrs,
   239  		Health:            health,
   240  		HealthDescription: desc,
   241  	}
   242  }
   243  
   244  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   245  	if handle == nil {
   246  		return fmt.Errorf("handle cannot be nil")
   247  	}
   248  
   249  	// COMPAT(0.10): pre 0.9 upgrade path check
   250  	if handle.Version == 0 {
   251  		return d.recoverPre09Task(handle)
   252  	}
   253  
   254  	// If already attached to handle there's nothing to recover.
   255  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   256  		d.logger.Trace("nothing to recover; task already exists",
   257  			"task_id", handle.Config.ID,
   258  			"task_name", handle.Config.Name,
   259  		)
   260  		return nil
   261  	}
   262  
   263  	// Handle doesn't already exist, try to reattach
   264  	var taskState TaskState
   265  	if err := handle.GetDriverState(&taskState); err != nil {
   266  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   267  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   268  	}
   269  
   270  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   271  	if err != nil {
   272  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   273  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   274  	}
   275  
   276  	// Create client for reattached executor
   277  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   278  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   279  	if err != nil {
   280  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   281  		return fmt.Errorf("failed to reattach to executor: %v", err)
   282  	}
   283  
   284  	h := &taskHandle{
   285  		exec:         exec,
   286  		pid:          taskState.Pid,
   287  		pluginClient: pluginClient,
   288  		taskConfig:   taskState.TaskConfig,
   289  		procState:    drivers.TaskStateRunning,
   290  		startedAt:    taskState.StartedAt,
   291  		exitResult:   &drivers.ExitResult{},
   292  		logger:       d.logger,
   293  		doneCh:       make(chan struct{}),
   294  	}
   295  
   296  	d.tasks.Set(taskState.TaskConfig.ID, h)
   297  
   298  	go h.run()
   299  	return nil
   300  }
   301  
   302  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   303  	if !d.config.Enabled {
   304  		return nil, nil, errDisabledDriver
   305  	}
   306  
   307  	if _, ok := d.tasks.Get(cfg.ID); ok {
   308  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   309  	}
   310  
   311  	var driverConfig TaskConfig
   312  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   313  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   314  	}
   315  
   316  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   317  	handle := drivers.NewTaskHandle(taskHandleVersion)
   318  	handle.Config = cfg
   319  
   320  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   321  	executorConfig := &executor.ExecutorConfig{
   322  		LogFile:  pluginLogFile,
   323  		LogLevel: "debug",
   324  	}
   325  
   326  	exec, pluginClient, err := executor.CreateExecutor(
   327  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
   328  		d.nomadConfig, executorConfig)
   329  	if err != nil {
   330  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   331  	}
   332  
   333  	// Only use cgroups when running as root on linux - Doing so in other cases
   334  	// will cause an error.
   335  	useCgroups := !d.config.NoCgroups && runtime.GOOS == "linux" && syscall.Geteuid() == 0
   336  
   337  	execCmd := &executor.ExecCommand{
   338  		Cmd:                driverConfig.Command,
   339  		Args:               driverConfig.Args,
   340  		Env:                cfg.EnvList(),
   341  		User:               cfg.User,
   342  		BasicProcessCgroup: useCgroups,
   343  		TaskDir:            cfg.TaskDir().Dir,
   344  		StdoutPath:         cfg.StdoutPath,
   345  		StderrPath:         cfg.StderrPath,
   346  		NetworkIsolation:   cfg.NetworkIsolation,
   347  	}
   348  
   349  	ps, err := exec.Launch(execCmd)
   350  	if err != nil {
   351  		pluginClient.Kill()
   352  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   353  	}
   354  
   355  	h := &taskHandle{
   356  		exec:         exec,
   357  		pid:          ps.Pid,
   358  		pluginClient: pluginClient,
   359  		taskConfig:   cfg,
   360  		procState:    drivers.TaskStateRunning,
   361  		startedAt:    time.Now().Round(time.Millisecond),
   362  		logger:       d.logger,
   363  		doneCh:       make(chan struct{}),
   364  	}
   365  
   366  	driverState := TaskState{
   367  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   368  		Pid:            ps.Pid,
   369  		TaskConfig:     cfg,
   370  		StartedAt:      h.startedAt,
   371  	}
   372  
   373  	if err := handle.SetDriverState(&driverState); err != nil {
   374  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   375  		exec.Shutdown("", 0)
   376  		pluginClient.Kill()
   377  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   378  	}
   379  
   380  	d.tasks.Set(cfg.ID, h)
   381  	go h.run()
   382  	return handle, nil, nil
   383  }
   384  
   385  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   386  	handle, ok := d.tasks.Get(taskID)
   387  	if !ok {
   388  		return nil, drivers.ErrTaskNotFound
   389  	}
   390  
   391  	ch := make(chan *drivers.ExitResult)
   392  	go d.handleWait(ctx, handle, ch)
   393  
   394  	return ch, nil
   395  }
   396  
   397  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   398  	defer close(ch)
   399  	var result *drivers.ExitResult
   400  	ps, err := handle.exec.Wait(ctx)
   401  	if err != nil {
   402  		result = &drivers.ExitResult{
   403  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   404  		}
   405  	} else {
   406  		result = &drivers.ExitResult{
   407  			ExitCode: ps.ExitCode,
   408  			Signal:   ps.Signal,
   409  		}
   410  	}
   411  
   412  	select {
   413  	case <-ctx.Done():
   414  		return
   415  	case <-d.ctx.Done():
   416  		return
   417  	case ch <- result:
   418  	}
   419  }
   420  
   421  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   422  	handle, ok := d.tasks.Get(taskID)
   423  	if !ok {
   424  		return drivers.ErrTaskNotFound
   425  	}
   426  
   427  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   428  		if handle.pluginClient.Exited() {
   429  			return nil
   430  		}
   431  		return fmt.Errorf("executor Shutdown failed: %v", err)
   432  	}
   433  
   434  	// Wait for handle to finish
   435  	<-handle.doneCh
   436  
   437  	// Kill executor
   438  	handle.pluginClient.Kill()
   439  
   440  	return nil
   441  }
   442  
   443  func (d *Driver) DestroyTask(taskID string, force bool) error {
   444  	handle, ok := d.tasks.Get(taskID)
   445  	if !ok {
   446  		return drivers.ErrTaskNotFound
   447  	}
   448  
   449  	if handle.IsRunning() && !force {
   450  		return fmt.Errorf("cannot destroy running task")
   451  	}
   452  
   453  	if !handle.pluginClient.Exited() {
   454  		if err := handle.exec.Shutdown("", 0); err != nil {
   455  			handle.logger.Error("destroying executor failed", "err", err)
   456  		}
   457  
   458  		handle.pluginClient.Kill()
   459  	}
   460  
   461  	d.tasks.Delete(taskID)
   462  	return nil
   463  }
   464  
   465  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   466  	handle, ok := d.tasks.Get(taskID)
   467  	if !ok {
   468  		return nil, drivers.ErrTaskNotFound
   469  	}
   470  
   471  	return handle.TaskStatus(), nil
   472  }
   473  
   474  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   475  	handle, ok := d.tasks.Get(taskID)
   476  	if !ok {
   477  		return nil, drivers.ErrTaskNotFound
   478  	}
   479  
   480  	return handle.exec.Stats(ctx, interval)
   481  }
   482  
   483  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   484  	return d.eventer.TaskEvents(ctx)
   485  }
   486  
   487  func (d *Driver) SignalTask(taskID string, signal string) error {
   488  	handle, ok := d.tasks.Get(taskID)
   489  	if !ok {
   490  		return drivers.ErrTaskNotFound
   491  	}
   492  
   493  	sig := os.Interrupt
   494  	if s, ok := signals.SignalLookup[signal]; ok {
   495  		sig = s
   496  	} else {
   497  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   498  	}
   499  
   500  	return handle.exec.Signal(sig)
   501  }
   502  
   503  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   504  	if len(cmd) == 0 {
   505  		return nil, fmt.Errorf("error cmd must have at least one value")
   506  	}
   507  	handle, ok := d.tasks.Get(taskID)
   508  	if !ok {
   509  		return nil, drivers.ErrTaskNotFound
   510  	}
   511  
   512  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:])
   513  	if err != nil {
   514  		return nil, err
   515  	}
   516  
   517  	return &drivers.ExecTaskResult{
   518  		Stdout: out,
   519  		ExitResult: &drivers.ExitResult{
   520  			ExitCode: exitCode,
   521  		},
   522  	}, nil
   523  }
   524  
   525  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   526  
   527  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   528  	taskID string,
   529  	command []string,
   530  	tty bool,
   531  	stream drivers.ExecTaskStream) error {
   532  
   533  	if len(command) == 0 {
   534  		return fmt.Errorf("error cmd must have at least one value")
   535  	}
   536  	handle, ok := d.tasks.Get(taskID)
   537  	if !ok {
   538  		return drivers.ErrTaskNotFound
   539  	}
   540  
   541  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   542  }