github.com/rohankumardubey/nomad@v0.11.8/plugins/drivers/driver.go (about)

     1  package drivers
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/client/allocdir"
    14  	cstructs "github.com/hashicorp/nomad/client/structs"
    15  	"github.com/hashicorp/nomad/helper"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/plugins/base"
    18  	"github.com/hashicorp/nomad/plugins/drivers/proto"
    19  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    20  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    21  	"github.com/zclconf/go-cty/cty"
    22  	"github.com/zclconf/go-cty/cty/msgpack"
    23  )
    24  
    25  const (
    26  	// DriverHealthy is the default health description that should be used
    27  	// if the driver is nominal
    28  	DriverHealthy = "Healthy"
    29  
    30  	// Pre09TaskHandleVersion is the version used to identify that the task
    31  	// handle is from a driver that existed before driver plugins (v0.9). The
    32  	// driver should take appropriate action to handle the old driver state.
    33  	Pre09TaskHandleVersion = 0
    34  )
    35  
    36  // DriverPlugin is the interface with drivers will implement. It is also
    37  // implemented by a plugin client which proxies the calls to go-plugin. See
    38  // the proto/driver.proto file for detailed information about each RPC and
    39  // message structure.
    40  type DriverPlugin interface {
    41  	base.BasePlugin
    42  
    43  	TaskConfigSchema() (*hclspec.Spec, error)
    44  	Capabilities() (*Capabilities, error)
    45  	Fingerprint(context.Context) (<-chan *Fingerprint, error)
    46  
    47  	RecoverTask(*TaskHandle) error
    48  	StartTask(*TaskConfig) (*TaskHandle, *DriverNetwork, error)
    49  	WaitTask(ctx context.Context, taskID string) (<-chan *ExitResult, error)
    50  	StopTask(taskID string, timeout time.Duration, signal string) error
    51  	DestroyTask(taskID string, force bool) error
    52  	InspectTask(taskID string) (*TaskStatus, error)
    53  	TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error)
    54  	TaskEvents(context.Context) (<-chan *TaskEvent, error)
    55  
    56  	SignalTask(taskID string, signal string) error
    57  	ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error)
    58  }
    59  
    60  // ExecTaskStreamingDriver marks that a driver supports streaming exec task.  This represents a user friendly
    61  // interface to implement, as an alternative to the ExecTaskStreamingRawDriver, the low level interface.
    62  type ExecTaskStreamingDriver interface {
    63  	ExecTaskStreaming(ctx context.Context, taskID string, execOptions *ExecOptions) (*ExitResult, error)
    64  }
    65  
    66  type ExecOptions struct {
    67  	// Command is command to run
    68  	Command []string
    69  
    70  	// Tty indicates whether pseudo-terminal is to be allocated
    71  	Tty bool
    72  
    73  	// streams
    74  	Stdin  io.ReadCloser
    75  	Stdout io.WriteCloser
    76  	Stderr io.WriteCloser
    77  
    78  	// terminal size channel
    79  	ResizeCh <-chan TerminalSize
    80  }
    81  
    82  // DriverNetworkManager is the interface with exposes function for creating a
    83  // network namespace for which tasks can join. This only needs to be implemented
    84  // if the driver MUST create the network namespace
    85  type DriverNetworkManager interface {
    86  	CreateNetwork(allocID string) (*NetworkIsolationSpec, bool, error)
    87  	DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error
    88  }
    89  
    90  // DriverSignalTaskNotSupported can be embedded by drivers which don't support
    91  // the SignalTask RPC. This satisfies the SignalTask func requirement for the
    92  // DriverPlugin interface.
    93  type DriverSignalTaskNotSupported struct{}
    94  
    95  func (DriverSignalTaskNotSupported) SignalTask(taskID, signal string) error {
    96  	return fmt.Errorf("SignalTask is not supported by this driver")
    97  }
    98  
    99  // DriverExecTaskNotSupported can be embedded by drivers which don't support
   100  // the ExecTask RPC. This satisfies the ExecTask func requirement of the
   101  // DriverPlugin interface.
   102  type DriverExecTaskNotSupported struct{}
   103  
   104  func (_ DriverExecTaskNotSupported) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) {
   105  	return nil, fmt.Errorf("ExecTask is not supported by this driver")
   106  }
   107  
   108  type HealthState string
   109  
   110  var (
   111  	HealthStateUndetected = HealthState("undetected")
   112  	HealthStateUnhealthy  = HealthState("unhealthy")
   113  	HealthStateHealthy    = HealthState("healthy")
   114  )
   115  
   116  type Fingerprint struct {
   117  	Attributes        map[string]*pstructs.Attribute
   118  	Health            HealthState
   119  	HealthDescription string
   120  
   121  	// Err is set by the plugin if an error occurred during fingerprinting
   122  	Err error
   123  }
   124  
   125  // FSIsolation is an enumeration to describe what kind of filesystem isolation
   126  // a driver supports.
   127  type FSIsolation string
   128  
   129  var (
   130  	// FSIsolationNone means no isolation. The host filesystem is used.
   131  	FSIsolationNone = FSIsolation("none")
   132  
   133  	// FSIsolationChroot means the driver will use a chroot on the host
   134  	// filesystem.
   135  	FSIsolationChroot = FSIsolation("chroot")
   136  
   137  	// FSIsolationImage means the driver uses an image.
   138  	FSIsolationImage = FSIsolation("image")
   139  )
   140  
   141  type Capabilities struct {
   142  	// SendSignals marks the driver as being able to send signals
   143  	SendSignals bool
   144  
   145  	// Exec marks the driver as being able to execute arbitrary commands
   146  	// such as health checks. Used by the ScriptExecutor interface.
   147  	Exec bool
   148  
   149  	//FSIsolation indicates what kind of filesystem isolation the driver supports.
   150  	FSIsolation FSIsolation
   151  
   152  	//NetIsolationModes lists the set of isolation modes supported by the driver
   153  	NetIsolationModes []NetIsolationMode
   154  
   155  	// MustInitiateNetwork tells Nomad that the driver must create the network
   156  	// namespace and that the CreateNetwork and DestroyNetwork RPCs are implemented.
   157  	MustInitiateNetwork bool
   158  
   159  	// MountConfigs tells Nomad which mounting config options the driver supports.
   160  	MountConfigs MountConfigSupport
   161  }
   162  
   163  func (c *Capabilities) HasNetIsolationMode(m NetIsolationMode) bool {
   164  	for _, mode := range c.NetIsolationModes {
   165  		if mode == m {
   166  			return true
   167  		}
   168  	}
   169  	return false
   170  }
   171  
   172  type NetIsolationMode string
   173  
   174  var (
   175  	// NetIsolationModeHost disables network isolation and uses the host network
   176  	NetIsolationModeHost = NetIsolationMode("host")
   177  
   178  	// NetIsolationModeGroup uses the group network namespace for isolation
   179  	NetIsolationModeGroup = NetIsolationMode("group")
   180  
   181  	// NetIsolationModeTask isolates the network to just the task
   182  	NetIsolationModeTask = NetIsolationMode("task")
   183  
   184  	// NetIsolationModeNone indicates that there is no network to isolate and is
   185  	// intended to be used for tasks that the client manages remotely
   186  	NetIsolationModeNone = NetIsolationMode("none")
   187  )
   188  
   189  type NetworkIsolationSpec struct {
   190  	Mode   NetIsolationMode
   191  	Path   string
   192  	Labels map[string]string
   193  }
   194  
   195  // MountConfigSupport is an enum that defaults to "all" for backwards
   196  // compatibility with community drivers.
   197  type MountConfigSupport int32
   198  
   199  const (
   200  	MountConfigSupportAll MountConfigSupport = iota
   201  	MountConfigSupportNone
   202  )
   203  
   204  type TerminalSize struct {
   205  	Height int
   206  	Width  int
   207  }
   208  
   209  type TaskConfig struct {
   210  	ID               string
   211  	JobName          string
   212  	TaskGroupName    string
   213  	Name             string
   214  	Env              map[string]string
   215  	DeviceEnv        map[string]string
   216  	Resources        *Resources
   217  	Devices          []*DeviceConfig
   218  	Mounts           []*MountConfig
   219  	User             string
   220  	AllocDir         string
   221  	rawDriverConfig  []byte
   222  	StdoutPath       string
   223  	StderrPath       string
   224  	AllocID          string
   225  	NetworkIsolation *NetworkIsolationSpec
   226  }
   227  
   228  func (tc *TaskConfig) Copy() *TaskConfig {
   229  	if tc == nil {
   230  		return nil
   231  	}
   232  	c := new(TaskConfig)
   233  	*c = *tc
   234  	c.Env = helper.CopyMapStringString(c.Env)
   235  	c.DeviceEnv = helper.CopyMapStringString(c.DeviceEnv)
   236  	c.Resources = tc.Resources.Copy()
   237  
   238  	if c.Devices != nil {
   239  		dc := make([]*DeviceConfig, len(c.Devices))
   240  		for i, c := range c.Devices {
   241  			dc[i] = c.Copy()
   242  		}
   243  		c.Devices = dc
   244  	}
   245  
   246  	if c.Mounts != nil {
   247  		mc := make([]*MountConfig, len(c.Mounts))
   248  		for i, m := range c.Mounts {
   249  			mc[i] = m.Copy()
   250  		}
   251  		c.Mounts = mc
   252  	}
   253  
   254  	return c
   255  }
   256  
   257  func (tc *TaskConfig) EnvList() []string {
   258  	l := make([]string, 0, len(tc.Env))
   259  	for k, v := range tc.Env {
   260  		l = append(l, k+"="+v)
   261  	}
   262  
   263  	sort.Strings(l)
   264  	return l
   265  }
   266  
   267  func (tc *TaskConfig) TaskDir() *allocdir.TaskDir {
   268  	taskDir := filepath.Join(tc.AllocDir, tc.Name)
   269  	return &allocdir.TaskDir{
   270  		Dir:            taskDir,
   271  		SharedAllocDir: filepath.Join(tc.AllocDir, allocdir.SharedAllocName),
   272  		LogDir:         filepath.Join(tc.AllocDir, allocdir.SharedAllocName, allocdir.LogDirName),
   273  		SharedTaskDir:  filepath.Join(taskDir, allocdir.SharedAllocName),
   274  		LocalDir:       filepath.Join(taskDir, allocdir.TaskLocal),
   275  		SecretsDir:     filepath.Join(taskDir, allocdir.TaskSecrets),
   276  	}
   277  }
   278  
   279  func (tc *TaskConfig) DecodeDriverConfig(t interface{}) error {
   280  	return base.MsgPackDecode(tc.rawDriverConfig, t)
   281  }
   282  
   283  func (tc *TaskConfig) EncodeDriverConfig(val cty.Value) error {
   284  	data, err := msgpack.Marshal(val, val.Type())
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	tc.rawDriverConfig = data
   290  	return nil
   291  }
   292  
   293  func (tc *TaskConfig) EncodeConcreteDriverConfig(t interface{}) error {
   294  	data := []byte{}
   295  	err := base.MsgPackEncode(&data, t)
   296  	if err != nil {
   297  		return err
   298  	}
   299  
   300  	tc.rawDriverConfig = data
   301  	return nil
   302  }
   303  
   304  type Resources struct {
   305  	NomadResources *structs.AllocatedTaskResources
   306  	LinuxResources *LinuxResources
   307  }
   308  
   309  func (r *Resources) Copy() *Resources {
   310  	if r == nil {
   311  		return nil
   312  	}
   313  	res := new(Resources)
   314  	if r.NomadResources != nil {
   315  		res.NomadResources = r.NomadResources.Copy()
   316  	}
   317  	if r.LinuxResources != nil {
   318  		res.LinuxResources = r.LinuxResources.Copy()
   319  	}
   320  	return res
   321  }
   322  
   323  type LinuxResources struct {
   324  	CPUPeriod        int64
   325  	CPUQuota         int64
   326  	CPUShares        int64
   327  	MemoryLimitBytes int64
   328  	OOMScoreAdj      int64
   329  	CpusetCPUs       string
   330  	CpusetMems       string
   331  
   332  	// PrecentTicks is used to calculate the CPUQuota, currently the docker
   333  	// driver exposes cpu period and quota through the driver configuration
   334  	// and thus the calculation for CPUQuota cannot be done on the client.
   335  	// This is a capatability and should only be used by docker until the docker
   336  	// specific options are deprecated in favor of exposes CPUPeriod and
   337  	// CPUQuota at the task resource stanza.
   338  	PercentTicks float64
   339  }
   340  
   341  func (r *LinuxResources) Copy() *LinuxResources {
   342  	res := new(LinuxResources)
   343  	*res = *r
   344  	return res
   345  }
   346  
   347  type DeviceConfig struct {
   348  	TaskPath    string
   349  	HostPath    string
   350  	Permissions string
   351  }
   352  
   353  func (d *DeviceConfig) Copy() *DeviceConfig {
   354  	if d == nil {
   355  		return nil
   356  	}
   357  
   358  	dc := new(DeviceConfig)
   359  	*dc = *d
   360  	return dc
   361  }
   362  
   363  type MountConfig struct {
   364  	TaskPath        string
   365  	HostPath        string
   366  	Readonly        bool
   367  	PropagationMode string
   368  }
   369  
   370  func (m *MountConfig) IsEqual(o *MountConfig) bool {
   371  	return m.TaskPath == o.TaskPath &&
   372  		m.HostPath == o.HostPath &&
   373  		m.Readonly == o.Readonly &&
   374  		m.PropagationMode == o.PropagationMode
   375  }
   376  
   377  func (m *MountConfig) Copy() *MountConfig {
   378  	if m == nil {
   379  		return nil
   380  	}
   381  
   382  	mc := new(MountConfig)
   383  	*mc = *m
   384  	return mc
   385  }
   386  
   387  const (
   388  	TaskStateUnknown TaskState = "unknown"
   389  	TaskStateRunning TaskState = "running"
   390  	TaskStateExited  TaskState = "exited"
   391  )
   392  
   393  type TaskState string
   394  
   395  type ExitResult struct {
   396  	ExitCode  int
   397  	Signal    int
   398  	OOMKilled bool
   399  	Err       error
   400  }
   401  
   402  func (r *ExitResult) Successful() bool {
   403  	return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil
   404  }
   405  
   406  func (r *ExitResult) Copy() *ExitResult {
   407  	if r == nil {
   408  		return nil
   409  	}
   410  	res := new(ExitResult)
   411  	*res = *r
   412  	return res
   413  }
   414  
   415  type TaskStatus struct {
   416  	ID               string
   417  	Name             string
   418  	State            TaskState
   419  	StartedAt        time.Time
   420  	CompletedAt      time.Time
   421  	ExitResult       *ExitResult
   422  	DriverAttributes map[string]string
   423  	NetworkOverride  *DriverNetwork
   424  }
   425  
   426  type TaskEvent struct {
   427  	TaskID      string
   428  	TaskName    string
   429  	AllocID     string
   430  	Timestamp   time.Time
   431  	Message     string
   432  	Annotations map[string]string
   433  
   434  	// Err is only used if an error occurred while consuming the RPC stream
   435  	Err error
   436  }
   437  
   438  type ExecTaskResult struct {
   439  	Stdout     []byte
   440  	Stderr     []byte
   441  	ExitResult *ExitResult
   442  }
   443  
   444  // DriverNetwork is the network created by driver's (eg Docker's bridge
   445  // network) during Prestart.
   446  type DriverNetwork struct {
   447  	// PortMap can be set by drivers to replace ports in environment
   448  	// variables with driver-specific mappings.
   449  	PortMap map[string]int
   450  
   451  	// IP is the IP address for the task created by the driver.
   452  	IP string
   453  
   454  	// AutoAdvertise indicates whether the driver thinks services that
   455  	// choose to auto-advertise-addresses should use this IP instead of the
   456  	// host's. eg If a Docker network plugin is used
   457  	AutoAdvertise bool
   458  }
   459  
   460  // Advertise returns true if the driver suggests using the IP set. May be
   461  // called on a nil Network in which case it returns false.
   462  func (d *DriverNetwork) Advertise() bool {
   463  	return d != nil && d.AutoAdvertise
   464  }
   465  
   466  // Copy a DriverNetwork struct. If it is nil, nil is returned.
   467  func (d *DriverNetwork) Copy() *DriverNetwork {
   468  	if d == nil {
   469  		return nil
   470  	}
   471  	pm := make(map[string]int, len(d.PortMap))
   472  	for k, v := range d.PortMap {
   473  		pm[k] = v
   474  	}
   475  	return &DriverNetwork{
   476  		PortMap:       pm,
   477  		IP:            d.IP,
   478  		AutoAdvertise: d.AutoAdvertise,
   479  	}
   480  }
   481  
   482  // Hash the contents of a DriverNetwork struct to detect changes. If it is nil,
   483  // an empty slice is returned.
   484  func (d *DriverNetwork) Hash() []byte {
   485  	if d == nil {
   486  		return []byte{}
   487  	}
   488  	h := md5.New()
   489  	io.WriteString(h, d.IP)
   490  	io.WriteString(h, strconv.FormatBool(d.AutoAdvertise))
   491  	for k, v := range d.PortMap {
   492  		io.WriteString(h, k)
   493  		io.WriteString(h, strconv.Itoa(v))
   494  	}
   495  	return h.Sum(nil)
   496  }
   497  
   498  //// helper types for operating on raw exec operation
   499  // we alias proto instances as much as possible to avoid conversion overhead
   500  
   501  // ExecTaskStreamingRawDriver represents a low-level interface for executing a streaming exec
   502  // call, and is intended to be used when driver instance is to delegate exec handling to another
   503  // backend, e.g. to a executor or a driver behind a grpc/rpc protocol
   504  //
   505  // Nomad client would prefer this interface method over `ExecTaskStreaming` if driver implements it.
   506  type ExecTaskStreamingRawDriver interface {
   507  	ExecTaskStreamingRaw(
   508  		ctx context.Context,
   509  		taskID string,
   510  		command []string,
   511  		tty bool,
   512  		stream ExecTaskStream) error
   513  }
   514  
   515  // ExecTaskStream represents a stream of exec streaming messages,
   516  // and is a handle to get stdin and tty size and send back
   517  // stdout/stderr and exit operations.
   518  //
   519  // The methods are not concurrent safe; callers must ensure that methods are called
   520  // from at most one goroutine.
   521  type ExecTaskStream interface {
   522  	// Send relays response message back to API.
   523  	//
   524  	// The call is synchronous and no references to message is held: once
   525  	// method call completes, the message reference can be reused or freed.
   526  	Send(*ExecTaskStreamingResponseMsg) error
   527  
   528  	// Receive exec streaming messages from API.  Returns `io.EOF` on completion of stream.
   529  	Recv() (*ExecTaskStreamingRequestMsg, error)
   530  }
   531  
   532  type ExecTaskStreamingRequestMsg = proto.ExecTaskStreamingRequest
   533  type ExecTaskStreamingResponseMsg = proto.ExecTaskStreamingResponse
   534  
   535  // InternalCapabilitiesDriver is an experimental interface enabling a driver
   536  // to disable some nomad functionality (e.g. logs or metrics).
   537  //
   538  // Intended for internal drivers only while the interface is stabalized.
   539  type InternalCapabilitiesDriver interface {
   540  	InternalCapabilities() InternalCapabilities
   541  }
   542  
   543  // InternalCapabilities flags disabled functionality.
   544  // Zero value means all is supported.
   545  type InternalCapabilities struct {
   546  	DisableLogCollection     bool
   547  	DisableMetricsCollection bool
   548  }