github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/drivers/exec/driver.go (about)

     1  package exec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/hashicorp/consul-template/signals"
    13  	hclog "github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/nomad/client/fingerprint"
    15  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    16  	"github.com/hashicorp/nomad/drivers/shared/executor"
    17  	"github.com/hashicorp/nomad/helper"
    18  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    19  	"github.com/hashicorp/nomad/plugins/base"
    20  	"github.com/hashicorp/nomad/plugins/drivers"
    21  	"github.com/hashicorp/nomad/plugins/drivers/utils"
    22  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    23  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    24  )
    25  
    26  const (
    27  	// pluginName is the name of the plugin
    28  	pluginName = "exec"
    29  
    30  	// fingerprintPeriod is the interval at which the driver will send fingerprint responses
    31  	fingerprintPeriod = 30 * time.Second
    32  
    33  	// taskHandleVersion is the version of task handle which this driver sets
    34  	// and understands how to decode driver state
    35  	taskHandleVersion = 1
    36  )
    37  
    38  var (
    39  	// PluginID is the exec plugin metadata registered in the plugin
    40  	// catalog.
    41  	PluginID = loader.PluginID{
    42  		Name:       pluginName,
    43  		PluginType: base.PluginTypeDriver,
    44  	}
    45  
    46  	// PluginConfig is the exec driver factory function registered in the
    47  	// plugin catalog.
    48  	PluginConfig = &loader.InternalPluginConfig{
    49  		Config:  map[string]interface{}{},
    50  		Factory: func(l hclog.Logger) interface{} { return NewExecDriver(l) },
    51  	}
    52  
    53  	// pluginInfo is the response returned for the PluginInfo RPC
    54  	pluginInfo = &base.PluginInfoResponse{
    55  		Type:              base.PluginTypeDriver,
    56  		PluginApiVersions: []string{drivers.ApiVersion010},
    57  		PluginVersion:     "0.1.0",
    58  		Name:              pluginName,
    59  	}
    60  
    61  	// configSpec is the hcl specification returned by the ConfigSchema RPC
    62  	configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    63  		"no_pivot_root": hclspec.NewDefault(
    64  			hclspec.NewAttr("no_pivot_root", "bool", false),
    65  			hclspec.NewLiteral("false"),
    66  		),
    67  	})
    68  
    69  	// taskConfigSpec is the hcl specification for the driver config section of
    70  	// a task within a job. It is returned in the TaskConfigSchema RPC
    71  	taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
    72  		"command": hclspec.NewAttr("command", "string", true),
    73  		"args":    hclspec.NewAttr("args", "list(string)", false),
    74  	})
    75  
    76  	// capabilities is returned by the Capabilities RPC and indicates what
    77  	// optional features this driver supports
    78  	capabilities = &drivers.Capabilities{
    79  		SendSignals: true,
    80  		Exec:        true,
    81  		FSIsolation: drivers.FSIsolationChroot,
    82  		NetIsolationModes: []drivers.NetIsolationMode{
    83  			drivers.NetIsolationModeHost,
    84  			drivers.NetIsolationModeGroup,
    85  		},
    86  	}
    87  )
    88  
    89  // Driver fork/execs tasks using many of the underlying OS's isolation
    90  // features where configured.
    91  type Driver struct {
    92  	// eventer is used to handle multiplexing of TaskEvents calls such that an
    93  	// event can be broadcast to all callers
    94  	eventer *eventer.Eventer
    95  
    96  	// config is the driver configuration set by the SetConfig RPC
    97  	config Config
    98  
    99  	// nomadConfig is the client config from nomad
   100  	nomadConfig *base.ClientDriverConfig
   101  
   102  	// tasks is the in memory datastore mapping taskIDs to driverHandles
   103  	tasks *taskStore
   104  
   105  	// ctx is the context for the driver. It is passed to other subsystems to
   106  	// coordinate shutdown
   107  	ctx context.Context
   108  
   109  	// signalShutdown is called when the driver is shutting down and cancels the
   110  	// ctx passed to any subsystems
   111  	signalShutdown context.CancelFunc
   112  
   113  	// logger will log to the Nomad agent
   114  	logger hclog.Logger
   115  
   116  	// A tri-state boolean to know if the fingerprinting has happened and
   117  	// whether it has been successful
   118  	fingerprintSuccess *bool
   119  	fingerprintLock    sync.Mutex
   120  }
   121  
   122  // Config is the driver configuration set by the SetConfig RPC call
   123  type Config struct {
   124  	// NoPivotRoot disables the use of pivot_root, useful when the root partition
   125  	// is on ramdisk
   126  	NoPivotRoot bool `codec:"no_pivot_root"`
   127  }
   128  
   129  // TaskConfig is the driver configuration of a task within a job
   130  type TaskConfig struct {
   131  	Command string   `codec:"command"`
   132  	Args    []string `codec:"args"`
   133  }
   134  
   135  // TaskState is the state which is encoded in the handle returned in
   136  // StartTask. This information is needed to rebuild the task state and handler
   137  // during recovery.
   138  type TaskState struct {
   139  	ReattachConfig *pstructs.ReattachConfig
   140  	TaskConfig     *drivers.TaskConfig
   141  	Pid            int
   142  	StartedAt      time.Time
   143  }
   144  
   145  // NewExecDriver returns a new DrivePlugin implementation
   146  func NewExecDriver(logger hclog.Logger) drivers.DriverPlugin {
   147  	ctx, cancel := context.WithCancel(context.Background())
   148  	logger = logger.Named(pluginName)
   149  	return &Driver{
   150  		eventer:        eventer.NewEventer(ctx, logger),
   151  		tasks:          newTaskStore(),
   152  		ctx:            ctx,
   153  		signalShutdown: cancel,
   154  		logger:         logger,
   155  	}
   156  }
   157  
   158  // setFingerprintSuccess marks the driver as having fingerprinted successfully
   159  func (d *Driver) setFingerprintSuccess() {
   160  	d.fingerprintLock.Lock()
   161  	d.fingerprintSuccess = helper.BoolToPtr(true)
   162  	d.fingerprintLock.Unlock()
   163  }
   164  
   165  // setFingerprintFailure marks the driver as having failed fingerprinting
   166  func (d *Driver) setFingerprintFailure() {
   167  	d.fingerprintLock.Lock()
   168  	d.fingerprintSuccess = helper.BoolToPtr(false)
   169  	d.fingerprintLock.Unlock()
   170  }
   171  
   172  // fingerprintSuccessful returns true if the driver has
   173  // never fingerprinted or has successfully fingerprinted
   174  func (d *Driver) fingerprintSuccessful() bool {
   175  	d.fingerprintLock.Lock()
   176  	defer d.fingerprintLock.Unlock()
   177  	return d.fingerprintSuccess == nil || *d.fingerprintSuccess
   178  }
   179  
   180  func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) {
   181  	return pluginInfo, nil
   182  }
   183  
   184  func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
   185  	return configSpec, nil
   186  }
   187  
   188  func (d *Driver) SetConfig(cfg *base.Config) error {
   189  	var config Config
   190  	if len(cfg.PluginConfig) != 0 {
   191  		if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
   192  			return err
   193  		}
   194  	}
   195  
   196  	d.config = config
   197  	if cfg != nil && cfg.AgentConfig != nil {
   198  		d.nomadConfig = cfg.AgentConfig.Driver
   199  	}
   200  	return nil
   201  }
   202  
   203  func (d *Driver) Shutdown() {
   204  	d.signalShutdown()
   205  }
   206  
   207  func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
   208  	return taskConfigSpec, nil
   209  }
   210  
   211  func (d *Driver) Capabilities() (*drivers.Capabilities, error) {
   212  	return capabilities, nil
   213  }
   214  
   215  func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
   216  	ch := make(chan *drivers.Fingerprint)
   217  	go d.handleFingerprint(ctx, ch)
   218  	return ch, nil
   219  
   220  }
   221  func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
   222  	defer close(ch)
   223  	ticker := time.NewTimer(0)
   224  	for {
   225  		select {
   226  		case <-ctx.Done():
   227  			return
   228  		case <-d.ctx.Done():
   229  			return
   230  		case <-ticker.C:
   231  			ticker.Reset(fingerprintPeriod)
   232  			ch <- d.buildFingerprint()
   233  		}
   234  	}
   235  }
   236  
   237  func (d *Driver) buildFingerprint() *drivers.Fingerprint {
   238  	if runtime.GOOS != "linux" {
   239  		d.setFingerprintFailure()
   240  		return &drivers.Fingerprint{
   241  			Health:            drivers.HealthStateUndetected,
   242  			HealthDescription: "exec driver unsupported on client OS",
   243  		}
   244  	}
   245  
   246  	fp := &drivers.Fingerprint{
   247  		Attributes:        map[string]*pstructs.Attribute{},
   248  		Health:            drivers.HealthStateHealthy,
   249  		HealthDescription: drivers.DriverHealthy,
   250  	}
   251  
   252  	if !utils.IsUnixRoot() {
   253  		fp.Health = drivers.HealthStateUndetected
   254  		fp.HealthDescription = drivers.DriverRequiresRootMessage
   255  		d.setFingerprintFailure()
   256  		return fp
   257  	}
   258  
   259  	mount, err := fingerprint.FindCgroupMountpointDir()
   260  	if err != nil {
   261  		fp.Health = drivers.HealthStateUnhealthy
   262  		fp.HealthDescription = drivers.NoCgroupMountMessage
   263  		if d.fingerprintSuccessful() {
   264  			d.logger.Warn(fp.HealthDescription, "error", err)
   265  		}
   266  		d.setFingerprintFailure()
   267  		return fp
   268  	}
   269  
   270  	if mount == "" {
   271  		fp.Health = drivers.HealthStateUnhealthy
   272  		fp.HealthDescription = drivers.CgroupMountEmpty
   273  		d.setFingerprintFailure()
   274  		return fp
   275  	}
   276  
   277  	fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true)
   278  	d.setFingerprintSuccess()
   279  	return fp
   280  }
   281  
   282  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   283  	if handle == nil {
   284  		return fmt.Errorf("handle cannot be nil")
   285  	}
   286  
   287  	// COMPAT(0.10): pre 0.9 upgrade path check
   288  	if handle.Version == 0 {
   289  		return d.recoverPre09Task(handle)
   290  	}
   291  
   292  	// If already attached to handle there's nothing to recover.
   293  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   294  		d.logger.Trace("nothing to recover; task already exists",
   295  			"task_id", handle.Config.ID,
   296  			"task_name", handle.Config.Name,
   297  		)
   298  		return nil
   299  	}
   300  
   301  	// Handle doesn't already exist, try to reattach
   302  	var taskState TaskState
   303  	if err := handle.GetDriverState(&taskState); err != nil {
   304  		d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID)
   305  		return fmt.Errorf("failed to decode task state from handle: %v", err)
   306  	}
   307  
   308  	// Create client for reattached executor
   309  	plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
   310  	if err != nil {
   311  		d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
   312  		return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
   313  	}
   314  
   315  	exec, pluginClient, err := executor.ReattachToExecutor(plugRC,
   316  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID))
   317  	if err != nil {
   318  		d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID)
   319  		return fmt.Errorf("failed to reattach to executor: %v", err)
   320  	}
   321  
   322  	h := &taskHandle{
   323  		exec:         exec,
   324  		pid:          taskState.Pid,
   325  		pluginClient: pluginClient,
   326  		taskConfig:   taskState.TaskConfig,
   327  		procState:    drivers.TaskStateRunning,
   328  		startedAt:    taskState.StartedAt,
   329  		exitResult:   &drivers.ExitResult{},
   330  		logger:       d.logger,
   331  	}
   332  
   333  	d.tasks.Set(taskState.TaskConfig.ID, h)
   334  
   335  	go h.run()
   336  	return nil
   337  }
   338  
   339  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   340  	if _, ok := d.tasks.Get(cfg.ID); ok {
   341  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   342  	}
   343  
   344  	var driverConfig TaskConfig
   345  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   346  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   347  	}
   348  
   349  	d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
   350  	handle := drivers.NewTaskHandle(taskHandleVersion)
   351  	handle.Config = cfg
   352  
   353  	pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out")
   354  	executorConfig := &executor.ExecutorConfig{
   355  		LogFile:     pluginLogFile,
   356  		LogLevel:    "debug",
   357  		FSIsolation: true,
   358  	}
   359  
   360  	exec, pluginClient, err := executor.CreateExecutor(
   361  		d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID),
   362  		d.nomadConfig, executorConfig)
   363  	if err != nil {
   364  		return nil, nil, fmt.Errorf("failed to create executor: %v", err)
   365  	}
   366  
   367  	user := cfg.User
   368  	if user == "" {
   369  		user = "nobody"
   370  	}
   371  
   372  	execCmd := &executor.ExecCommand{
   373  		Cmd:              driverConfig.Command,
   374  		Args:             driverConfig.Args,
   375  		Env:              cfg.EnvList(),
   376  		User:             user,
   377  		ResourceLimits:   true,
   378  		NoPivotRoot:      d.config.NoPivotRoot,
   379  		Resources:        cfg.Resources,
   380  		TaskDir:          cfg.TaskDir().Dir,
   381  		StdoutPath:       cfg.StdoutPath,
   382  		StderrPath:       cfg.StderrPath,
   383  		Mounts:           cfg.Mounts,
   384  		Devices:          cfg.Devices,
   385  		NetworkIsolation: cfg.NetworkIsolation,
   386  	}
   387  
   388  	ps, err := exec.Launch(execCmd)
   389  	if err != nil {
   390  		pluginClient.Kill()
   391  		return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
   392  	}
   393  
   394  	h := &taskHandle{
   395  		exec:         exec,
   396  		pid:          ps.Pid,
   397  		pluginClient: pluginClient,
   398  		taskConfig:   cfg,
   399  		procState:    drivers.TaskStateRunning,
   400  		startedAt:    time.Now().Round(time.Millisecond),
   401  		logger:       d.logger,
   402  	}
   403  
   404  	driverState := TaskState{
   405  		ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
   406  		Pid:            ps.Pid,
   407  		TaskConfig:     cfg,
   408  		StartedAt:      h.startedAt,
   409  	}
   410  
   411  	if err := handle.SetDriverState(&driverState); err != nil {
   412  		d.logger.Error("failed to start task, error setting driver state", "error", err)
   413  		exec.Shutdown("", 0)
   414  		pluginClient.Kill()
   415  		return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
   416  	}
   417  
   418  	d.tasks.Set(cfg.ID, h)
   419  	go h.run()
   420  	return handle, nil, nil
   421  }
   422  
   423  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
   424  	handle, ok := d.tasks.Get(taskID)
   425  	if !ok {
   426  		return nil, drivers.ErrTaskNotFound
   427  	}
   428  
   429  	ch := make(chan *drivers.ExitResult)
   430  	go d.handleWait(ctx, handle, ch)
   431  
   432  	return ch, nil
   433  }
   434  
   435  func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
   436  	defer close(ch)
   437  	var result *drivers.ExitResult
   438  	ps, err := handle.exec.Wait(ctx)
   439  	if err != nil {
   440  		result = &drivers.ExitResult{
   441  			Err: fmt.Errorf("executor: error waiting on process: %v", err),
   442  		}
   443  	} else {
   444  		result = &drivers.ExitResult{
   445  			ExitCode: ps.ExitCode,
   446  			Signal:   ps.Signal,
   447  		}
   448  	}
   449  
   450  	select {
   451  	case <-ctx.Done():
   452  		return
   453  	case <-d.ctx.Done():
   454  		return
   455  	case ch <- result:
   456  	}
   457  }
   458  
   459  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
   460  	handle, ok := d.tasks.Get(taskID)
   461  	if !ok {
   462  		return drivers.ErrTaskNotFound
   463  	}
   464  
   465  	if err := handle.exec.Shutdown(signal, timeout); err != nil {
   466  		if handle.pluginClient.Exited() {
   467  			return nil
   468  		}
   469  		return fmt.Errorf("executor Shutdown failed: %v", err)
   470  	}
   471  
   472  	return nil
   473  }
   474  
   475  func (d *Driver) DestroyTask(taskID string, force bool) error {
   476  	handle, ok := d.tasks.Get(taskID)
   477  	if !ok {
   478  		return drivers.ErrTaskNotFound
   479  	}
   480  
   481  	if handle.IsRunning() && !force {
   482  		return fmt.Errorf("cannot destroy running task")
   483  	}
   484  
   485  	if !handle.pluginClient.Exited() {
   486  		if err := handle.exec.Shutdown("", 0); err != nil {
   487  			handle.logger.Error("destroying executor failed", "err", err)
   488  		}
   489  
   490  		handle.pluginClient.Kill()
   491  	}
   492  
   493  	d.tasks.Delete(taskID)
   494  	return nil
   495  }
   496  
   497  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
   498  	handle, ok := d.tasks.Get(taskID)
   499  	if !ok {
   500  		return nil, drivers.ErrTaskNotFound
   501  	}
   502  
   503  	return handle.TaskStatus(), nil
   504  }
   505  
   506  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
   507  	handle, ok := d.tasks.Get(taskID)
   508  	if !ok {
   509  		return nil, drivers.ErrTaskNotFound
   510  	}
   511  
   512  	return handle.exec.Stats(ctx, interval)
   513  }
   514  
   515  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
   516  	return d.eventer.TaskEvents(ctx)
   517  }
   518  
   519  func (d *Driver) SignalTask(taskID string, signal string) error {
   520  	handle, ok := d.tasks.Get(taskID)
   521  	if !ok {
   522  		return drivers.ErrTaskNotFound
   523  	}
   524  
   525  	sig := os.Interrupt
   526  	if s, ok := signals.SignalLookup[signal]; ok {
   527  		sig = s
   528  	} else {
   529  		d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
   530  
   531  	}
   532  	return handle.exec.Signal(sig)
   533  }
   534  
   535  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
   536  	if len(cmd) == 0 {
   537  		return nil, fmt.Errorf("error cmd must have at least one value")
   538  	}
   539  	handle, ok := d.tasks.Get(taskID)
   540  	if !ok {
   541  		return nil, drivers.ErrTaskNotFound
   542  	}
   543  
   544  	args := []string{}
   545  	if len(cmd) > 1 {
   546  		args = cmd[1:]
   547  	}
   548  
   549  	out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args)
   550  	if err != nil {
   551  		return nil, err
   552  	}
   553  
   554  	return &drivers.ExecTaskResult{
   555  		Stdout: out,
   556  		ExitResult: &drivers.ExitResult{
   557  			ExitCode: exitCode,
   558  		},
   559  	}, nil
   560  }
   561  
   562  var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil)
   563  
   564  func (d *Driver) ExecTaskStreamingRaw(ctx context.Context,
   565  	taskID string,
   566  	command []string,
   567  	tty bool,
   568  	stream drivers.ExecTaskStream) error {
   569  
   570  	if len(command) == 0 {
   571  		return fmt.Errorf("error cmd must have at least one value")
   572  	}
   573  	handle, ok := d.tasks.Get(taskID)
   574  	if !ok {
   575  		return drivers.ErrTaskNotFound
   576  	}
   577  
   578  	return handle.exec.ExecStreaming(ctx, command, tty, stream)
   579  }