github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/rawexec/driver.go (about)

     1  package rawexec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/consul-template/signals"
    14  	"github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    18  	"github.com/hashicorp/nomad/plugins/base"
    19  	"github.com/hashicorp/nomad/plugins/drivers"
    20  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    21  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    22  )
    23  
    24  const (
    25  	// pluginName is the name of the plugin
    26  	pluginName = "raw_exec"
    27  
    28  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    29  	fingerprintPeriod = 30 * time.Second
    30  
    31  	// taskHandleVersion is the version of task handle which this driver sets
    32  	// and understands how to decode driver state
    33  	taskHandleVersion = 1
    34  )
    35  
    36  var (
    37  	// PluginID is the rawexec plugin metadata registered in the plugin
    38  	// catalog.
    39  	PluginID = loader.PluginID{
    40  		Name:       pluginName,
    41  		PluginType: base.PluginTypeDriver,
    42  	}
    43  
    44  	// PluginConfig is the rawexec factory function registered in the
    45  	// plugin catalog.
    46  	PluginConfig = &loader.InternalPluginConfig{
    47  		Config:  map[string]interface{}{},
    48  		Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewRawExecDriver(ctx, l) },
    49  	}
    50  
    51  	errDisabledDriver = fmt.Errorf("raw_exec is disabled")
    52  )
    53  
    54  // PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options.
    55  func PluginLoader(opts map[string]string) (map[string]interface{}, error) {
    56  	conf := map[string]interface{}{}
    57  	if v, err := strconv.ParseBool(opts["driver.raw_exec.enable"]); err == nil {
    58  		conf["enabled"] = v
    59  	}
    60  	if v, err := strconv.ParseBool(opts["driver.raw_exec.no_cgroups"]); err == nil {
    61  		conf["no_cgroups"] = v
    62  	}
    63  	return conf, nil
    64  }
    65  
    66  var (
    67  	// pluginInfo is the response returned for the PluginInfo RPC
    68  	pluginInfo = &base.PluginInfoResponse{
    69  		Type:              base.PluginTypeDriver,
    70  		PluginApiVersions: []string{drivers.ApiVersion010},
    71  		PluginVersion:     "0.1.0",
    72  		Name:              pluginName,
    73  	}
    74  
    75  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    76  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    77  		"enabled": hclspec.NewDefault(
    78  			hclspec.NewAttr("enabled", "bool", false),
    79  			hclspec.NewLiteral("false"),
    80  		),
    81  		"no_cgroups": hclspec.NewDefault(
    82  			hclspec.NewAttr("no_cgroups", "bool", false),
    83  			hclspec.NewLiteral("false"),
    84  		),
    85  	})
    86  
    87  	// taskConfigSpec is the hcl specification for the driver config section of
    88  	// a task within a job. It is returned in the TaskConfigSchema RPC
    89  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    90  		"command": hclspec.NewAttr("command", "string", true),
    91  		"args":    hclspec.NewAttr("args", "list(string)", false),
    92  	})
    93  
    94  	// capabilities is returned by the Capabilities RPC and indicates what
    95  	// optional features this driver supports
    96  	capabilities = &drivers.Capabilities{
    97  		SendSignals: true,
    98  		Exec:        true,
    99  		FSIsolation: drivers.FSIsolationNone,
   100  		NetIsolationModes: []drivers.NetIsolationMode{
   101  			drivers.NetIsolationModeHost,
   102  			drivers.NetIsolationModeGroup,
   103  		},
   104  		MountConfigs: drivers.MountConfigSupportNone,
   105  	}
   106  )
   107  
   108  // Driver is a privileged version of the exec driver. It provides no
   109  // resource isolation and just fork/execs. The Exec driver should be preferred
   110  // and this should only be used when explicitly needed.
   111  type Driver struct {
   112  	// eventer is used to handle multiplexing of TaskEvents calls such that an
   113  	// event can be broadcast to all callers
   114  	eventer *eventer.Eventer
   115  
   116  	// config is the driver configuration set by the SetConfig RPC
   117  	config *Config
   118  
   119  	// nomadConfig is the client config from nomad
   120  	nomadConfig *base.ClientDriverConfig
   121  
   122  	// tasks is the in memory datastore mapping taskIDs to driverHandles
   123  	tasks *taskStore
   124  
   125  	// ctx is the context for the driver. It is passed to other subsystems to
   126  	// coordinate shutdown
   127  	ctx context.Context
   128  
   129  	// logger will log to the Nomad agent
   130  	logger hclog.Logger
   131  }
   132  
   133  // Config is the driver configuration set by the SetConfig RPC call
   134  type Config struct {
   135  	// NoCgroups tracks whether we should use a cgroup to manage the process
   136  	// tree
   137  	NoCgroups bool `codec:"no_cgroups"`
   138  
   139  	// Enabled is set to true to enable the raw_exec driver
   140  	Enabled bool `codec:"enabled"`
   141  }
   142  
   143  // TaskConfig is the driver configuration of a task within a job
   144  type TaskConfig struct {
   145  	Command string   `codec:"command"`
   146  	Args    []string `codec:"args"`
   147  }
   148  
   149  // TaskState is the state which is encoded in the handle returned in
   150  // StartTask. This information is needed to rebuild the task state and handler
   151  // during recovery.
   152  type TaskState struct {
   153  	ReattachConfig *pstructs.ReattachConfig
   154  	TaskConfig     *drivers.TaskConfig
   155  	Pid            int
   156  	StartedAt      time.Time
   157  }
   158  
   159  // NewRawExecDriver returns a new DriverPlugin implementation
   160  func NewRawExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin {
   161  	logger = logger.Named(pluginName)
   162  	return &Driver{
   163  		eventer: eventer.NewEventer(ctx, logger),
   164  		config:  &Config{},
   165  		tasks:   newTaskStore(),
   166  		ctx:     ctx,
   167  		logger:  logger,
   168  	}
   169  }
   170  
   171  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   172  	return pluginInfo, nil
   173  }
   174  
   175  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   176  	return configSpec, nil
   177  }
   178  
   179  func (d *Driver) SetConfig(cfg *base.Config) error {
   180  	var config Config
   181  	if len(cfg.PluginConfig) != 0 {
   182  		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
   183  			return err
   184  		}
   185  	}
   186  
   187  	d.config = &config
   188  	if cfg.AgentConfig != nil {
   189  		d.nomadConfig = cfg.AgentConfig.Driver
   190  	}
   191  	return nil
   192  }
   193  
   194  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   195  	return taskConfigSpec, nil
   196  }
   197  
   198  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   199  	return capabilities, nil
   200  }
   201  
   202  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   203  	ch := make(chan *drivers.Fingerprint)
   204  	go d.handleFingerprint(ctx, ch)
   205  	return ch, nil
   206  }
   207  
   208  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   209  	defer close(ch)
   210  	ticker := time.NewTimer(0)
   211  	for {
   212  		select {
   213  		case <-ctx.Done():
   214  			return
   215  		case <-d.ctx.Done():
   216  			return
   217  		case <-ticker.C:
   218  			ticker.Reset(fingerprintPeriod)
   219  			ch <- d.buildFingerprint()
   220  		}
   221  	}
   222  }
   223  
   224  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   225  	var health drivers.HealthState
   226  	var desc string
   227  	attrs := map[string]*pstructs.Attribute{}
   228  	if d.config.Enabled {
   229  		health = drivers.HealthStateHealthy
   230  		desc = drivers.DriverHealthy
   231  		attrs["driver.raw_exec"] = pstructs.NewBoolAttribute(true)
   232  	} else {
   233  		health = drivers.HealthStateUndetected
   234  		desc = "disabled"
   235  	}
   236  
   237  	return &drivers.Fingerprint{
   238  		Attributes:        attrs,
   239  		Health:            health,
   240  		HealthDescription: desc,
   241  	}
   242  }
   243  
   244  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   245  	if handle == nil {
   246  		return fmt.Errorf("handle cannot be nil")
   247  	}
   248  
   249  	// If already attached to handle there's nothing to recover.
   250  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   251  		d.logger.Trace("nothing to recover; task already exists",
   252  			"task_id", handle.Config.ID,
   253  			"task_name", handle.Config.Name,
   254  		)
   255  		return nil
   256  	}
   257  
   258  	// Handle doesn't already exist, try to reattach
   259  	var taskState TaskState
   260  	if err := handle.GetDriverState(&taskState); err != nil {
   261  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   262  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   263  	}
   264  
   265  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   266  	if err != nil {
   267  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   268  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   269  	}
   270  
   271  	// Create client for reattached executor
   272  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   273  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   274  	if err != nil {
   275  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   276  		return fmt.Errorf("failed to reattach to executor: %v", err)
   277  	}
   278  
   279  	h := &taskHandle{
   280  		exec:         exec,
   281  		pid:          taskState.Pid,
   282  		pluginClient: pluginClient,
   283  		taskConfig:   taskState.TaskConfig,
   284  		procState:    drivers.TaskStateRunning,
   285  		startedAt:    taskState.StartedAt,
   286  		exitResult:   &drivers.ExitResult{},
   287  		logger:       d.logger,
   288  		doneCh:       make(chan struct{}),
   289  	}
   290  
   291  	d.tasks.Set(taskState.TaskConfig.ID, h)
   292  
   293  	go h.run()
   294  	return nil
   295  }
   296  
   297  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   298  	if !d.config.Enabled {
   299  		return nil, nil, errDisabledDriver
   300  	}
   301  
   302  	if _, ok := d.tasks.Get(cfg.ID); ok {
   303  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   304  	}
   305  
   306  	var driverConfig TaskConfig
   307  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   308  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   309  	}
   310  
   311  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   312  	handle := drivers.NewTaskHandle(taskHandleVersion)
   313  	handle.Config = cfg
   314  
   315  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   316  	executorConfig := &executor.ExecutorConfig{
   317  		LogFile:  pluginLogFile,
   318  		LogLevel: "debug",
   319  	}
   320  
   321  	logger := d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)
   322  	exec, pluginClient, err := executor.CreateExecutor(logger, d.nomadConfig, executorConfig)
   323  	if err != nil {
   324  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   325  	}
   326  
   327  	// Only use cgroups when running as root on linux - Doing so in other cases
   328  	// will cause an error.
   329  	useCgroups := !d.config.NoCgroups && runtime.GOOS == "linux" && syscall.Geteuid() == 0
   330  
   331  	execCmd := &executor.ExecCommand{
   332  		Cmd:                driverConfig.Command,
   333  		Args:               driverConfig.Args,
   334  		Env:                cfg.EnvList(),
   335  		User:               cfg.User,
   336  		BasicProcessCgroup: useCgroups,
   337  		TaskDir:            cfg.TaskDir().Dir,
   338  		StdoutPath:         cfg.StdoutPath,
   339  		StderrPath:         cfg.StderrPath,
   340  		NetworkIsolation:   cfg.NetworkIsolation,
   341  	}
   342  
   343  	ps, err := exec.Launch(execCmd)
   344  	if err != nil {
   345  		pluginClient.Kill()
   346  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   347  	}
   348  
   349  	h := &taskHandle{
   350  		exec:         exec,
   351  		pid:          ps.Pid,
   352  		pluginClient: pluginClient,
   353  		taskConfig:   cfg,
   354  		procState:    drivers.TaskStateRunning,
   355  		startedAt:    time.Now().Round(time.Millisecond),
   356  		logger:       d.logger,
   357  		doneCh:       make(chan struct{}),
   358  	}
   359  
   360  	driverState := TaskState{
   361  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   362  		Pid:            ps.Pid,
   363  		TaskConfig:     cfg,
   364  		StartedAt:      h.startedAt,
   365  	}
   366  
   367  	if err := handle.SetDriverState(&driverState); err != nil {
   368  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   369  		_ = exec.Shutdown("", 0)
   370  		pluginClient.Kill()
   371  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   372  	}
   373  
   374  	d.tasks.Set(cfg.ID, h)
   375  	go h.run()
   376  	return handle, nil, nil
   377  }
   378  
   379  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   380  	handle, ok := d.tasks.Get(taskID)
   381  	if !ok {
   382  		return nil, drivers.ErrTaskNotFound
   383  	}
   384  
   385  	ch := make(chan *drivers.ExitResult)
   386  	go d.handleWait(ctx, handle, ch)
   387  
   388  	return ch, nil
   389  }
   390  
   391  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   392  	defer close(ch)
   393  	var result *drivers.ExitResult
   394  	ps, err := handle.exec.Wait(ctx)
   395  	if err != nil {
   396  		result = &drivers.ExitResult{
   397  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   398  		}
   399  	} else {
   400  		result = &drivers.ExitResult{
   401  			ExitCode: ps.ExitCode,
   402  			Signal:   ps.Signal,
   403  		}
   404  	}
   405  
   406  	select {
   407  	case <-ctx.Done():
   408  		return
   409  	case <-d.ctx.Done():
   410  		return
   411  	case ch <- result:
   412  	}
   413  }
   414  
   415  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   416  	handle, ok := d.tasks.Get(taskID)
   417  	if !ok {
   418  		return drivers.ErrTaskNotFound
   419  	}
   420  
   421  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   422  		if handle.pluginClient.Exited() {
   423  			return nil
   424  		}
   425  		return fmt.Errorf("executor Shutdown failed: %v", err)
   426  	}
   427  
   428  	// Wait for handle to finish
   429  	<-handle.doneCh
   430  
   431  	// Kill executor
   432  	handle.pluginClient.Kill()
   433  
   434  	return nil
   435  }
   436  
   437  func (d *Driver) DestroyTask(taskID string, force bool) error {
   438  	handle, ok := d.tasks.Get(taskID)
   439  	if !ok {
   440  		return drivers.ErrTaskNotFound
   441  	}
   442  
   443  	if handle.IsRunning() && !force {
   444  		return fmt.Errorf("cannot destroy running task")
   445  	}
   446  
   447  	if !handle.pluginClient.Exited() {
   448  		if err := handle.exec.Shutdown("", 0); err != nil {
   449  			handle.logger.Error("destroying executor failed", "error", err)
   450  		}
   451  
   452  		handle.pluginClient.Kill()
   453  	}
   454  
   455  	d.tasks.Delete(taskID)
   456  	return nil
   457  }
   458  
   459  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   460  	handle, ok := d.tasks.Get(taskID)
   461  	if !ok {
   462  		return nil, drivers.ErrTaskNotFound
   463  	}
   464  
   465  	return handle.TaskStatus(), nil
   466  }
   467  
   468  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   469  	handle, ok := d.tasks.Get(taskID)
   470  	if !ok {
   471  		return nil, drivers.ErrTaskNotFound
   472  	}
   473  
   474  	return handle.exec.Stats(ctx, interval)
   475  }
   476  
   477  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   478  	return d.eventer.TaskEvents(ctx)
   479  }
   480  
   481  func (d *Driver) SignalTask(taskID string, signal string) error {
   482  	handle, ok := d.tasks.Get(taskID)
   483  	if !ok {
   484  		return drivers.ErrTaskNotFound
   485  	}
   486  
   487  	sig := os.Interrupt
   488  	if s, ok := signals.SignalLookup[signal]; ok {
   489  		sig = s
   490  	} else {
   491  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   492  	}
   493  
   494  	return handle.exec.Signal(sig)
   495  }
   496  
   497  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   498  	if len(cmd) == 0 {
   499  		return nil, fmt.Errorf("error cmd must have at least one value")
   500  	}
   501  	handle, ok := d.tasks.Get(taskID)
   502  	if !ok {
   503  		return nil, drivers.ErrTaskNotFound
   504  	}
   505  
   506  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:])
   507  	if err != nil {
   508  		return nil, err
   509  	}
   510  
   511  	return &drivers.ExecTaskResult{
   512  		Stdout: out,
   513  		ExitResult: &drivers.ExitResult{
   514  			ExitCode: exitCode,
   515  		},
   516  	}, nil
   517  }
   518  
   519  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   520  
   521  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   522  	taskID string,
   523  	command []string,
   524  	tty bool,
   525  	stream drivers.ExecTaskStream) error {
   526  
   527  	if len(command) == 0 {
   528  		return fmt.Errorf("error cmd must have at least one value")
   529  	}
   530  	handle, ok := d.tasks.Get(taskID)
   531  	if !ok {
   532  		return drivers.ErrTaskNotFound
   533  	}
   534  
   535  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   536  }