github.com/janma/nomad@v0.11.3/drivers/exec/driver.go (about)

     1  package exec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/hashicorp/consul-template/signals"
    13  	hclog "github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/nomad/client/fingerprint"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper"
    18  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    19  	"github.com/hashicorp/nomad/plugins/base"
    20  	"github.com/hashicorp/nomad/plugins/drivers"
    21  	"github.com/hashicorp/nomad/plugins/drivers/utils"
    22  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    23  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    24  )
    25  
    26  const (
    27  	// pluginName is the name of the plugin
    28  	pluginName = "exec"
    29  
    30  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    31  	fingerprintPeriod = 30 * time.Second
    32  
    33  	// taskHandleVersion is the version of task handle which this driver sets
    34  	// and understands how to decode driver state
    35  	taskHandleVersion = 1
    36  )
    37  
    38  var (
    39  	// PluginID is the exec plugin metadata registered in the plugin
    40  	// catalog.
    41  	PluginID = loader.PluginID{
    42  		Name:       pluginName,
    43  		PluginType: base.PluginTypeDriver,
    44  	}
    45  
    46  	// PluginConfig is the exec driver factory function registered in the
    47  	// plugin catalog.
    48  	PluginConfig = &loader.InternalPluginConfig{
    49  		Config:  map[string]interface{}{},
    50  		Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewExecDriver(ctx, l) },
    51  	}
    52  
    53  	// pluginInfo is the response returned for the PluginInfo RPC
    54  	pluginInfo = &base.PluginInfoResponse{
    55  		Type:              base.PluginTypeDriver,
    56  		PluginApiVersions: []string{drivers.ApiVersion010},
    57  		PluginVersion:     "0.1.0",
    58  		Name:              pluginName,
    59  	}
    60  
    61  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    62  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    63  		"no_pivot_root": hclspec.NewDefault(
    64  			hclspec.NewAttr("no_pivot_root", "bool", false),
    65  			hclspec.NewLiteral("false"),
    66  		),
    67  	})
    68  
    69  	// taskConfigSpec is the hcl specification for the driver config section of
    70  	// a task within a job. It is returned in the TaskConfigSchema RPC
    71  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    72  		"command": hclspec.NewAttr("command", "string", true),
    73  		"args":    hclspec.NewAttr("args", "list(string)", false),
    74  	})
    75  
    76  	// capabilities is returned by the Capabilities RPC and indicates what
    77  	// optional features this driver supports
    78  	capabilities = &drivers.Capabilities{
    79  		SendSignals: true,
    80  		Exec:        true,
    81  		FSIsolation: drivers.FSIsolationChroot,
    82  		NetIsolationModes: []drivers.NetIsolationMode{
    83  			drivers.NetIsolationModeHost,
    84  			drivers.NetIsolationModeGroup,
    85  		},
    86  		MountConfigs: drivers.MountConfigSupportAll,
    87  	}
    88  )
    89  
    90  // Driver fork/execs tasks using many of the underlying OS's isolation
    91  // features where configured.
    92  type Driver struct {
    93  	// eventer is used to handle multiplexing of TaskEvents calls such that an
    94  	// event can be broadcast to all callers
    95  	eventer *eventer.Eventer
    96  
    97  	// config is the driver configuration set by the SetConfig RPC
    98  	config Config
    99  
   100  	// nomadConfig is the client config from nomad
   101  	nomadConfig *base.ClientDriverConfig
   102  
   103  	// tasks is the in memory datastore mapping taskIDs to driverHandles
   104  	tasks *taskStore
   105  
   106  	// ctx is the context for the driver. It is passed to other subsystems to
   107  	// coordinate shutdown
   108  	ctx context.Context
   109  
   110  	// logger will log to the Nomad agent
   111  	logger hclog.Logger
   112  
   113  	// A tri-state boolean to know if the fingerprinting has happened and
   114  	// whether it has been successful
   115  	fingerprintSuccess *bool
   116  	fingerprintLock    sync.Mutex
   117  }
   118  
   119  // Config is the driver configuration set by the SetConfig RPC call
   120  type Config struct {
   121  	// NoPivotRoot disables the use of pivot_root, useful when the root partition
   122  	// is on ramdisk
   123  	NoPivotRoot bool `codec:"no_pivot_root"`
   124  }
   125  
   126  // TaskConfig is the driver configuration of a task within a job
   127  type TaskConfig struct {
   128  	Command string   `codec:"command"`
   129  	Args    []string `codec:"args"`
   130  }
   131  
   132  // TaskState is the state which is encoded in the handle returned in
   133  // StartTask. This information is needed to rebuild the task state and handler
   134  // during recovery.
   135  type TaskState struct {
   136  	ReattachConfig *pstructs.ReattachConfig
   137  	TaskConfig     *drivers.TaskConfig
   138  	Pid            int
   139  	StartedAt      time.Time
   140  }
   141  
   142  // NewExecDriver returns a new DrivePlugin implementation
   143  func NewExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin {
   144  	logger = logger.Named(pluginName)
   145  	return &Driver{
   146  		eventer: eventer.NewEventer(ctx, logger),
   147  		tasks:   newTaskStore(),
   148  		ctx:     ctx,
   149  		logger:  logger,
   150  	}
   151  }
   152  
   153  // setFingerprintSuccess marks the driver as having fingerprinted successfully
   154  func (d *Driver) setFingerprintSuccess() {
   155  	d.fingerprintLock.Lock()
   156  	d.fingerprintSuccess = helper.BoolToPtr(true)
   157  	d.fingerprintLock.Unlock()
   158  }
   159  
   160  // setFingerprintFailure marks the driver as having failed fingerprinting
   161  func (d *Driver) setFingerprintFailure() {
   162  	d.fingerprintLock.Lock()
   163  	d.fingerprintSuccess = helper.BoolToPtr(false)
   164  	d.fingerprintLock.Unlock()
   165  }
   166  
   167  // fingerprintSuccessful returns true if the driver has
   168  // never fingerprinted or has successfully fingerprinted
   169  func (d *Driver) fingerprintSuccessful() bool {
   170  	d.fingerprintLock.Lock()
   171  	defer d.fingerprintLock.Unlock()
   172  	return d.fingerprintSuccess == nil || *d.fingerprintSuccess
   173  }
   174  
   175  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   176  	return pluginInfo, nil
   177  }
   178  
   179  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   180  	return configSpec, nil
   181  }
   182  
   183  func (d *Driver) SetConfig(cfg *base.Config) error {
   184  	var config Config
   185  	if len(cfg.PluginConfig) != 0 {
   186  		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
   187  			return err
   188  		}
   189  	}
   190  
   191  	d.config = config
   192  	if cfg != nil && cfg.AgentConfig != nil {
   193  		d.nomadConfig = cfg.AgentConfig.Driver
   194  	}
   195  	return nil
   196  }
   197  
   198  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   199  	return taskConfigSpec, nil
   200  }
   201  
   202  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   203  	return capabilities, nil
   204  }
   205  
   206  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   207  	ch := make(chan *drivers.Fingerprint)
   208  	go d.handleFingerprint(ctx, ch)
   209  	return ch, nil
   210  
   211  }
   212  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   213  	defer close(ch)
   214  	ticker := time.NewTimer(0)
   215  	for {
   216  		select {
   217  		case <-ctx.Done():
   218  			return
   219  		case <-d.ctx.Done():
   220  			return
   221  		case <-ticker.C:
   222  			ticker.Reset(fingerprintPeriod)
   223  			ch <- d.buildFingerprint()
   224  		}
   225  	}
   226  }
   227  
   228  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   229  	if runtime.GOOS != "linux" {
   230  		d.setFingerprintFailure()
   231  		return &drivers.Fingerprint{
   232  			Health:            drivers.HealthStateUndetected,
   233  			HealthDescription: "exec driver unsupported on client OS",
   234  		}
   235  	}
   236  
   237  	fp := &drivers.Fingerprint{
   238  		Attributes:        map[string]*pstructs.Attribute{},
   239  		Health:            drivers.HealthStateHealthy,
   240  		HealthDescription: drivers.DriverHealthy,
   241  	}
   242  
   243  	if !utils.IsUnixRoot() {
   244  		fp.Health = drivers.HealthStateUndetected
   245  		fp.HealthDescription = drivers.DriverRequiresRootMessage
   246  		d.setFingerprintFailure()
   247  		return fp
   248  	}
   249  
   250  	mount, err := fingerprint.FindCgroupMountpointDir()
   251  	if err != nil {
   252  		fp.Health = drivers.HealthStateUnhealthy
   253  		fp.HealthDescription = drivers.NoCgroupMountMessage
   254  		if d.fingerprintSuccessful() {
   255  			d.logger.Warn(fp.HealthDescription, "error", err)
   256  		}
   257  		d.setFingerprintFailure()
   258  		return fp
   259  	}
   260  
   261  	if mount == "" {
   262  		fp.Health = drivers.HealthStateUnhealthy
   263  		fp.HealthDescription = drivers.CgroupMountEmpty
   264  		d.setFingerprintFailure()
   265  		return fp
   266  	}
   267  
   268  	fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true)
   269  	d.setFingerprintSuccess()
   270  	return fp
   271  }
   272  
   273  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   274  	if handle == nil {
   275  		return fmt.Errorf("handle cannot be nil")
   276  	}
   277  
   278  	// COMPAT(0.10): pre 0.9 upgrade path check
   279  	if handle.Version == 0 {
   280  		return d.recoverPre09Task(handle)
   281  	}
   282  
   283  	// If already attached to handle there's nothing to recover.
   284  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   285  		d.logger.Trace("nothing to recover; task already exists",
   286  			"task_id", handle.Config.ID,
   287  			"task_name", handle.Config.Name,
   288  		)
   289  		return nil
   290  	}
   291  
   292  	// Handle doesn't already exist, try to reattach
   293  	var taskState TaskState
   294  	if err := handle.GetDriverState(&taskState); err != nil {
   295  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   296  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   297  	}
   298  
   299  	// Create client for reattached executor
   300  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   301  	if err != nil {
   302  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   303  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   304  	}
   305  
   306  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   307  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   308  	if err != nil {
   309  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   310  		return fmt.Errorf("failed to reattach to executor: %v", err)
   311  	}
   312  
   313  	h := &taskHandle{
   314  		exec:         exec,
   315  		pid:          taskState.Pid,
   316  		pluginClient: pluginClient,
   317  		taskConfig:   taskState.TaskConfig,
   318  		procState:    drivers.TaskStateRunning,
   319  		startedAt:    taskState.StartedAt,
   320  		exitResult:   &drivers.ExitResult{},
   321  		logger:       d.logger,
   322  	}
   323  
   324  	d.tasks.Set(taskState.TaskConfig.ID, h)
   325  
   326  	go h.run()
   327  	return nil
   328  }
   329  
   330  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   331  	if _, ok := d.tasks.Get(cfg.ID); ok {
   332  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   333  	}
   334  
   335  	var driverConfig TaskConfig
   336  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   337  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   338  	}
   339  
   340  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   341  	handle := drivers.NewTaskHandle(taskHandleVersion)
   342  	handle.Config = cfg
   343  
   344  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   345  	executorConfig := &executor.ExecutorConfig{
   346  		LogFile:     pluginLogFile,
   347  		LogLevel:    "debug",
   348  		FSIsolation: true,
   349  	}
   350  
   351  	exec, pluginClient, err := executor.CreateExecutor(
   352  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
   353  		d.nomadConfig, executorConfig)
   354  	if err != nil {
   355  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   356  	}
   357  
   358  	user := cfg.User
   359  	if user == "" {
   360  		user = "nobody"
   361  	}
   362  
   363  	execCmd := &executor.ExecCommand{
   364  		Cmd:              driverConfig.Command,
   365  		Args:             driverConfig.Args,
   366  		Env:              cfg.EnvList(),
   367  		User:             user,
   368  		ResourceLimits:   true,
   369  		NoPivotRoot:      d.config.NoPivotRoot,
   370  		Resources:        cfg.Resources,
   371  		TaskDir:          cfg.TaskDir().Dir,
   372  		StdoutPath:       cfg.StdoutPath,
   373  		StderrPath:       cfg.StderrPath,
   374  		Mounts:           cfg.Mounts,
   375  		Devices:          cfg.Devices,
   376  		NetworkIsolation: cfg.NetworkIsolation,
   377  	}
   378  
   379  	ps, err := exec.Launch(execCmd)
   380  	if err != nil {
   381  		pluginClient.Kill()
   382  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   383  	}
   384  
   385  	h := &taskHandle{
   386  		exec:         exec,
   387  		pid:          ps.Pid,
   388  		pluginClient: pluginClient,
   389  		taskConfig:   cfg,
   390  		procState:    drivers.TaskStateRunning,
   391  		startedAt:    time.Now().Round(time.Millisecond),
   392  		logger:       d.logger,
   393  	}
   394  
   395  	driverState := TaskState{
   396  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   397  		Pid:            ps.Pid,
   398  		TaskConfig:     cfg,
   399  		StartedAt:      h.startedAt,
   400  	}
   401  
   402  	if err := handle.SetDriverState(&driverState); err != nil {
   403  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   404  		exec.Shutdown("", 0)
   405  		pluginClient.Kill()
   406  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   407  	}
   408  
   409  	d.tasks.Set(cfg.ID, h)
   410  	go h.run()
   411  	return handle, nil, nil
   412  }
   413  
   414  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   415  	handle, ok := d.tasks.Get(taskID)
   416  	if !ok {
   417  		return nil, drivers.ErrTaskNotFound
   418  	}
   419  
   420  	ch := make(chan *drivers.ExitResult)
   421  	go d.handleWait(ctx, handle, ch)
   422  
   423  	return ch, nil
   424  }
   425  
   426  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   427  	defer close(ch)
   428  	var result *drivers.ExitResult
   429  	ps, err := handle.exec.Wait(ctx)
   430  	if err != nil {
   431  		result = &drivers.ExitResult{
   432  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   433  		}
   434  	} else {
   435  		result = &drivers.ExitResult{
   436  			ExitCode: ps.ExitCode,
   437  			Signal:   ps.Signal,
   438  		}
   439  	}
   440  
   441  	select {
   442  	case <-ctx.Done():
   443  		return
   444  	case <-d.ctx.Done():
   445  		return
   446  	case ch <- result:
   447  	}
   448  }
   449  
   450  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   451  	handle, ok := d.tasks.Get(taskID)
   452  	if !ok {
   453  		return drivers.ErrTaskNotFound
   454  	}
   455  
   456  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   457  		if handle.pluginClient.Exited() {
   458  			return nil
   459  		}
   460  		return fmt.Errorf("executor Shutdown failed: %v", err)
   461  	}
   462  
   463  	return nil
   464  }
   465  
   466  func (d *Driver) DestroyTask(taskID string, force bool) error {
   467  	handle, ok := d.tasks.Get(taskID)
   468  	if !ok {
   469  		return drivers.ErrTaskNotFound
   470  	}
   471  
   472  	if handle.IsRunning() && !force {
   473  		return fmt.Errorf("cannot destroy running task")
   474  	}
   475  
   476  	if !handle.pluginClient.Exited() {
   477  		if err := handle.exec.Shutdown("", 0); err != nil {
   478  			handle.logger.Error("destroying executor failed", "err", err)
   479  		}
   480  
   481  		handle.pluginClient.Kill()
   482  	}
   483  
   484  	d.tasks.Delete(taskID)
   485  	return nil
   486  }
   487  
   488  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   489  	handle, ok := d.tasks.Get(taskID)
   490  	if !ok {
   491  		return nil, drivers.ErrTaskNotFound
   492  	}
   493  
   494  	return handle.TaskStatus(), nil
   495  }
   496  
   497  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   498  	handle, ok := d.tasks.Get(taskID)
   499  	if !ok {
   500  		return nil, drivers.ErrTaskNotFound
   501  	}
   502  
   503  	return handle.exec.Stats(ctx, interval)
   504  }
   505  
   506  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   507  	return d.eventer.TaskEvents(ctx)
   508  }
   509  
   510  func (d *Driver) SignalTask(taskID string, signal string) error {
   511  	handle, ok := d.tasks.Get(taskID)
   512  	if !ok {
   513  		return drivers.ErrTaskNotFound
   514  	}
   515  
   516  	sig := os.Interrupt
   517  	if s, ok := signals.SignalLookup[signal]; ok {
   518  		sig = s
   519  	} else {
   520  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   521  
   522  	}
   523  	return handle.exec.Signal(sig)
   524  }
   525  
   526  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   527  	if len(cmd) == 0 {
   528  		return nil, fmt.Errorf("error cmd must have at least one value")
   529  	}
   530  	handle, ok := d.tasks.Get(taskID)
   531  	if !ok {
   532  		return nil, drivers.ErrTaskNotFound
   533  	}
   534  
   535  	args := []string{}
   536  	if len(cmd) > 1 {
   537  		args = cmd[1:]
   538  	}
   539  
   540  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args)
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  
   545  	return &drivers.ExecTaskResult{
   546  		Stdout: out,
   547  		ExitResult: &drivers.ExitResult{
   548  			ExitCode: exitCode,
   549  		},
   550  	}, nil
   551  }
   552  
   553  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   554  
   555  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   556  	taskID string,
   557  	command []string,
   558  	tty bool,
   559  	stream drivers.ExecTaskStream) error {
   560  
   561  	if len(command) == 0 {
   562  		return fmt.Errorf("error cmd must have at least one value")
   563  	}
   564  	handle, ok := d.tasks.Get(taskID)
   565  	if !ok {
   566  		return drivers.ErrTaskNotFound
   567  	}
   568  
   569  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   570  }