github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/plugins/drivers/driver.go (about)

     1  package drivers
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/client/allocdir"
    14  	cstructs "github.com/hashicorp/nomad/client/structs"
    15  	"github.com/hashicorp/nomad/helper"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/plugins/base"
    18  	"github.com/hashicorp/nomad/plugins/drivers/proto"
    19  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    20  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    21  	"github.com/zclconf/go-cty/cty"
    22  	"github.com/zclconf/go-cty/cty/msgpack"
    23  )
    24  
    25  const (
    26  	// DriverHealthy is the default health description that should be used
    27  	// if the driver is nominal
    28  	DriverHealthy = "Healthy"
    29  
    30  	// Pre09TaskHandleVersion is the version used to identify that the task
    31  	// handle is from a driver that existed before driver plugins (v0.9). The
    32  	// driver should take appropriate action to handle the old driver state.
    33  	Pre09TaskHandleVersion = 0
    34  )
    35  
    36  // DriverPlugin is the interface with drivers will implement. It is also
    37  // implemented by a plugin client which proxies the calls to go-plugin. See
    38  // the proto/driver.proto file for detailed information about each RPC and
    39  // message structure.
    40  type DriverPlugin interface {
    41  	base.BasePlugin
    42  
    43  	TaskConfigSchema() (*hclspec.Spec, error)
    44  	Capabilities() (*Capabilities, error)
    45  	Fingerprint(context.Context) (<-chan *Fingerprint, error)
    46  
    47  	RecoverTask(*TaskHandle) error
    48  	StartTask(*TaskConfig) (*TaskHandle, *DriverNetwork, error)
    49  	WaitTask(ctx context.Context, taskID string) (<-chan *ExitResult, error)
    50  	StopTask(taskID string, timeout time.Duration, signal string) error
    51  	DestroyTask(taskID string, force bool) error
    52  	InspectTask(taskID string) (*TaskStatus, error)
    53  	TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error)
    54  	TaskEvents(context.Context) (<-chan *TaskEvent, error)
    55  
    56  	SignalTask(taskID string, signal string) error
    57  	ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error)
    58  }
    59  
    60  // ExecTaskStreamingDriver marks that a driver supports streaming exec task.  This represents a user friendly
    61  // interface to implement, as an alternative to the ExecTaskStreamingRawDriver, the low level interface.
    62  type ExecTaskStreamingDriver interface {
    63  	ExecTaskStreaming(ctx context.Context, taskID string, execOptions *ExecOptions) (*ExitResult, error)
    64  }
    65  
    66  type ExecOptions struct {
    67  	// Command is command to run
    68  	Command []string
    69  
    70  	// Tty indicates whether pseudo-terminal is to be allocated
    71  	Tty bool
    72  
    73  	// streams
    74  	Stdin  io.ReadCloser
    75  	Stdout io.WriteCloser
    76  	Stderr io.WriteCloser
    77  
    78  	// terminal size channel
    79  	ResizeCh <-chan TerminalSize
    80  }
    81  
    82  // DriverNetworkManager is the interface with exposes function for creating a
    83  // network namespace for which tasks can join. This only needs to be implemented
    84  // if the driver MUST create the network namespace
    85  type DriverNetworkManager interface {
    86  	CreateNetwork(allocID string) (*NetworkIsolationSpec, bool, error)
    87  	DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error
    88  }
    89  
    90  // DriverSignalTaskNotSupported can be embedded by drivers which don't support
    91  // the SignalTask RPC. This satisfies the SignalTask func requirement for the
    92  // DriverPlugin interface.
    93  type DriverSignalTaskNotSupported struct{}
    94  
    95  func (DriverSignalTaskNotSupported) SignalTask(taskID, signal string) error {
    96  	return fmt.Errorf("SignalTask is not supported by this driver")
    97  }
    98  
    99  // DriverExecTaskNotSupported can be embedded by drivers which don't support
   100  // the ExecTask RPC. This satisfies the ExecTask func requirement of the
   101  // DriverPlugin interface.
   102  type DriverExecTaskNotSupported struct{}
   103  
   104  func (_ DriverExecTaskNotSupported) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) {
   105  	return nil, fmt.Errorf("ExecTask is not supported by this driver")
   106  }
   107  
   108  type HealthState string
   109  
   110  var (
   111  	HealthStateUndetected = HealthState("undetected")
   112  	HealthStateUnhealthy  = HealthState("unhealthy")
   113  	HealthStateHealthy    = HealthState("healthy")
   114  )
   115  
   116  type Fingerprint struct {
   117  	Attributes        map[string]*pstructs.Attribute
   118  	Health            HealthState
   119  	HealthDescription string
   120  
   121  	// Err is set by the plugin if an error occurred during fingerprinting
   122  	Err error
   123  }
   124  
   125  // FSIsolation is an enumeration to describe what kind of filesystem isolation
   126  // a driver supports.
   127  type FSIsolation string
   128  
   129  var (
   130  	// FSIsolationNone means no isolation. The host filesystem is used.
   131  	FSIsolationNone = FSIsolation("none")
   132  
   133  	// FSIsolationChroot means the driver will use a chroot on the host
   134  	// filesystem.
   135  	FSIsolationChroot = FSIsolation("chroot")
   136  
   137  	// FSIsolationImage means the driver uses an image.
   138  	FSIsolationImage = FSIsolation("image")
   139  )
   140  
   141  type Capabilities struct {
   142  	// SendSignals marks the driver as being able to send signals
   143  	SendSignals bool
   144  
   145  	// Exec marks the driver as being able to execute arbitrary commands
   146  	// such as health checks. Used by the ScriptExecutor interface.
   147  	Exec bool
   148  
   149  	//FSIsolation indicates what kind of filesystem isolation the driver supports.
   150  	FSIsolation FSIsolation
   151  
   152  	//NetIsolationModes lists the set of isolation modes supported by the driver
   153  	NetIsolationModes []NetIsolationMode
   154  
   155  	// MustInitiateNetwork tells Nomad that the driver must create the network
   156  	// namespace and that the CreateNetwork and DestroyNetwork RPCs are implemented.
   157  	MustInitiateNetwork bool
   158  
   159  	// MountConfigs tells Nomad which mounting config options the driver supports.
   160  	MountConfigs MountConfigSupport
   161  }
   162  
   163  func (c *Capabilities) HasNetIsolationMode(m NetIsolationMode) bool {
   164  	for _, mode := range c.NetIsolationModes {
   165  		if mode == m {
   166  			return true
   167  		}
   168  	}
   169  	return false
   170  }
   171  
   172  type NetIsolationMode string
   173  
   174  var (
   175  	// NetIsolationModeHost disables network isolation and uses the host network
   176  	NetIsolationModeHost = NetIsolationMode("host")
   177  
   178  	// NetIsolationModeGroup uses the group network namespace for isolation
   179  	NetIsolationModeGroup = NetIsolationMode("group")
   180  
   181  	// NetIsolationModeTask isolates the network to just the task
   182  	NetIsolationModeTask = NetIsolationMode("task")
   183  
   184  	// NetIsolationModeNone indicates that there is no network to isolate and is
   185  	// intended to be used for tasks that the client manages remotely
   186  	NetIsolationModeNone = NetIsolationMode("none")
   187  )
   188  
   189  type NetworkIsolationSpec struct {
   190  	Mode   NetIsolationMode
   191  	Path   string
   192  	Labels map[string]string
   193  }
   194  
   195  // MountConfigSupport is an enum that defaults to "all" for backwards
   196  // compatibility with community drivers.
   197  type MountConfigSupport int32
   198  
   199  const (
   200  	MountConfigSupportAll MountConfigSupport = iota
   201  	MountConfigSupportNone
   202  )
   203  
   204  type TerminalSize struct {
   205  	Height int
   206  	Width  int
   207  }
   208  
   209  type DNSConfig struct {
   210  	Servers  []string
   211  	Searches []string
   212  	Options  []string
   213  }
   214  
   215  func (c *DNSConfig) Copy() *DNSConfig {
   216  	if c == nil {
   217  		return nil
   218  	}
   219  
   220  	cfg := new(DNSConfig)
   221  	if len(c.Servers) > 0 {
   222  		cfg.Servers = make([]string, len(c.Servers))
   223  		copy(cfg.Servers, c.Servers)
   224  	}
   225  	if len(c.Searches) > 0 {
   226  		cfg.Searches = make([]string, len(c.Searches))
   227  		copy(cfg.Searches, c.Searches)
   228  	}
   229  	if len(c.Options) > 0 {
   230  		cfg.Options = make([]string, len(c.Options))
   231  		copy(cfg.Options, c.Options)
   232  	}
   233  
   234  	return cfg
   235  }
   236  
   237  type TaskConfig struct {
   238  	ID               string
   239  	JobName          string
   240  	TaskGroupName    string
   241  	Name             string
   242  	Env              map[string]string
   243  	DeviceEnv        map[string]string
   244  	Resources        *Resources
   245  	Devices          []*DeviceConfig
   246  	Mounts           []*MountConfig
   247  	User             string
   248  	AllocDir         string
   249  	rawDriverConfig  []byte
   250  	StdoutPath       string
   251  	StderrPath       string
   252  	AllocID          string
   253  	NetworkIsolation *NetworkIsolationSpec
   254  	DNS              *DNSConfig
   255  }
   256  
   257  func (tc *TaskConfig) Copy() *TaskConfig {
   258  	if tc == nil {
   259  		return nil
   260  	}
   261  	c := new(TaskConfig)
   262  	*c = *tc
   263  	c.Env = helper.CopyMapStringString(c.Env)
   264  	c.DeviceEnv = helper.CopyMapStringString(c.DeviceEnv)
   265  	c.Resources = tc.Resources.Copy()
   266  	c.DNS = tc.DNS.Copy()
   267  
   268  	if c.Devices != nil {
   269  		dc := make([]*DeviceConfig, len(c.Devices))
   270  		for i, c := range c.Devices {
   271  			dc[i] = c.Copy()
   272  		}
   273  		c.Devices = dc
   274  	}
   275  
   276  	if c.Mounts != nil {
   277  		mc := make([]*MountConfig, len(c.Mounts))
   278  		for i, m := range c.Mounts {
   279  			mc[i] = m.Copy()
   280  		}
   281  		c.Mounts = mc
   282  	}
   283  
   284  	return c
   285  }
   286  
   287  func (tc *TaskConfig) EnvList() []string {
   288  	l := make([]string, 0, len(tc.Env))
   289  	for k, v := range tc.Env {
   290  		l = append(l, k+"="+v)
   291  	}
   292  
   293  	sort.Strings(l)
   294  	return l
   295  }
   296  
   297  func (tc *TaskConfig) TaskDir() *allocdir.TaskDir {
   298  	taskDir := filepath.Join(tc.AllocDir, tc.Name)
   299  	return &allocdir.TaskDir{
   300  		Dir:            taskDir,
   301  		SharedAllocDir: filepath.Join(tc.AllocDir, allocdir.SharedAllocName),
   302  		LogDir:         filepath.Join(tc.AllocDir, allocdir.SharedAllocName, allocdir.LogDirName),
   303  		SharedTaskDir:  filepath.Join(taskDir, allocdir.SharedAllocName),
   304  		LocalDir:       filepath.Join(taskDir, allocdir.TaskLocal),
   305  		SecretsDir:     filepath.Join(taskDir, allocdir.TaskSecrets),
   306  	}
   307  }
   308  
   309  func (tc *TaskConfig) DecodeDriverConfig(t interface{}) error {
   310  	return base.MsgPackDecode(tc.rawDriverConfig, t)
   311  }
   312  
   313  func (tc *TaskConfig) EncodeDriverConfig(val cty.Value) error {
   314  	data, err := msgpack.Marshal(val, val.Type())
   315  	if err != nil {
   316  		return err
   317  	}
   318  
   319  	tc.rawDriverConfig = data
   320  	return nil
   321  }
   322  
   323  func (tc *TaskConfig) EncodeConcreteDriverConfig(t interface{}) error {
   324  	data := []byte{}
   325  	err := base.MsgPackEncode(&data, t)
   326  	if err != nil {
   327  		return err
   328  	}
   329  
   330  	tc.rawDriverConfig = data
   331  	return nil
   332  }
   333  
   334  type Resources struct {
   335  	NomadResources *structs.AllocatedTaskResources
   336  	LinuxResources *LinuxResources
   337  	Ports          *structs.AllocatedPorts
   338  }
   339  
   340  func (r *Resources) Copy() *Resources {
   341  	if r == nil {
   342  		return nil
   343  	}
   344  	res := new(Resources)
   345  	if r.NomadResources != nil {
   346  		res.NomadResources = r.NomadResources.Copy()
   347  	}
   348  	if r.LinuxResources != nil {
   349  		res.LinuxResources = r.LinuxResources.Copy()
   350  	}
   351  
   352  	if r.Ports != nil {
   353  		ports := structs.AllocatedPorts(append(make([]structs.AllocatedPortMapping, 0, len(*r.Ports)), *r.Ports...))
   354  		res.Ports = &ports
   355  	}
   356  	return res
   357  }
   358  
   359  type LinuxResources struct {
   360  	CPUPeriod        int64
   361  	CPUQuota         int64
   362  	CPUShares        int64
   363  	MemoryLimitBytes int64
   364  	OOMScoreAdj      int64
   365  	CpusetCPUs       string
   366  	CpusetMems       string
   367  
   368  	// PrecentTicks is used to calculate the CPUQuota, currently the docker
   369  	// driver exposes cpu period and quota through the driver configuration
   370  	// and thus the calculation for CPUQuota cannot be done on the client.
   371  	// This is a capatability and should only be used by docker until the docker
   372  	// specific options are deprecated in favor of exposes CPUPeriod and
   373  	// CPUQuota at the task resource stanza.
   374  	PercentTicks float64
   375  }
   376  
   377  func (r *LinuxResources) Copy() *LinuxResources {
   378  	res := new(LinuxResources)
   379  	*res = *r
   380  	return res
   381  }
   382  
   383  type DeviceConfig struct {
   384  	TaskPath    string
   385  	HostPath    string
   386  	Permissions string
   387  }
   388  
   389  func (d *DeviceConfig) Copy() *DeviceConfig {
   390  	if d == nil {
   391  		return nil
   392  	}
   393  
   394  	dc := new(DeviceConfig)
   395  	*dc = *d
   396  	return dc
   397  }
   398  
   399  type MountConfig struct {
   400  	TaskPath        string
   401  	HostPath        string
   402  	Readonly        bool
   403  	PropagationMode string
   404  }
   405  
   406  func (m *MountConfig) IsEqual(o *MountConfig) bool {
   407  	return m.TaskPath == o.TaskPath &&
   408  		m.HostPath == o.HostPath &&
   409  		m.Readonly == o.Readonly &&
   410  		m.PropagationMode == o.PropagationMode
   411  }
   412  
   413  func (m *MountConfig) Copy() *MountConfig {
   414  	if m == nil {
   415  		return nil
   416  	}
   417  
   418  	mc := new(MountConfig)
   419  	*mc = *m
   420  	return mc
   421  }
   422  
   423  const (
   424  	TaskStateUnknown TaskState = "unknown"
   425  	TaskStateRunning TaskState = "running"
   426  	TaskStateExited  TaskState = "exited"
   427  )
   428  
   429  type TaskState string
   430  
   431  type ExitResult struct {
   432  	ExitCode  int
   433  	Signal    int
   434  	OOMKilled bool
   435  	Err       error
   436  }
   437  
   438  func (r *ExitResult) Successful() bool {
   439  	return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil
   440  }
   441  
   442  func (r *ExitResult) Copy() *ExitResult {
   443  	if r == nil {
   444  		return nil
   445  	}
   446  	res := new(ExitResult)
   447  	*res = *r
   448  	return res
   449  }
   450  
   451  type TaskStatus struct {
   452  	ID               string
   453  	Name             string
   454  	State            TaskState
   455  	StartedAt        time.Time
   456  	CompletedAt      time.Time
   457  	ExitResult       *ExitResult
   458  	DriverAttributes map[string]string
   459  	NetworkOverride  *DriverNetwork
   460  }
   461  
   462  type TaskEvent struct {
   463  	TaskID      string
   464  	TaskName    string
   465  	AllocID     string
   466  	Timestamp   time.Time
   467  	Message     string
   468  	Annotations map[string]string
   469  
   470  	// Err is only used if an error occurred while consuming the RPC stream
   471  	Err error
   472  }
   473  
   474  type ExecTaskResult struct {
   475  	Stdout     []byte
   476  	Stderr     []byte
   477  	ExitResult *ExitResult
   478  }
   479  
   480  // DriverNetwork is the network created by driver's (eg Docker's bridge
   481  // network) during Prestart.
   482  type DriverNetwork struct {
   483  	// PortMap can be set by drivers to replace ports in environment
   484  	// variables with driver-specific mappings.
   485  	PortMap map[string]int
   486  
   487  	// IP is the IP address for the task created by the driver.
   488  	IP string
   489  
   490  	// AutoAdvertise indicates whether the driver thinks services that
   491  	// choose to auto-advertise-addresses should use this IP instead of the
   492  	// host's. eg If a Docker network plugin is used
   493  	AutoAdvertise bool
   494  }
   495  
   496  // Advertise returns true if the driver suggests using the IP set. May be
   497  // called on a nil Network in which case it returns false.
   498  func (d *DriverNetwork) Advertise() bool {
   499  	return d != nil && d.AutoAdvertise
   500  }
   501  
   502  // Copy a DriverNetwork struct. If it is nil, nil is returned.
   503  func (d *DriverNetwork) Copy() *DriverNetwork {
   504  	if d == nil {
   505  		return nil
   506  	}
   507  	pm := make(map[string]int, len(d.PortMap))
   508  	for k, v := range d.PortMap {
   509  		pm[k] = v
   510  	}
   511  	return &DriverNetwork{
   512  		PortMap:       pm,
   513  		IP:            d.IP,
   514  		AutoAdvertise: d.AutoAdvertise,
   515  	}
   516  }
   517  
   518  // Hash the contents of a DriverNetwork struct to detect changes. If it is nil,
   519  // an empty slice is returned.
   520  func (d *DriverNetwork) Hash() []byte {
   521  	if d == nil {
   522  		return []byte{}
   523  	}
   524  	h := md5.New()
   525  	io.WriteString(h, d.IP)
   526  	io.WriteString(h, strconv.FormatBool(d.AutoAdvertise))
   527  	for k, v := range d.PortMap {
   528  		io.WriteString(h, k)
   529  		io.WriteString(h, strconv.Itoa(v))
   530  	}
   531  	return h.Sum(nil)
   532  }
   533  
   534  //// helper types for operating on raw exec operation
   535  // we alias proto instances as much as possible to avoid conversion overhead
   536  
   537  // ExecTaskStreamingRawDriver represents a low-level interface for executing a streaming exec
   538  // call, and is intended to be used when driver instance is to delegate exec handling to another
   539  // backend, e.g. to a executor or a driver behind a grpc/rpc protocol
   540  //
   541  // Nomad client would prefer this interface method over `ExecTaskStreaming` if driver implements it.
   542  type ExecTaskStreamingRawDriver interface {
   543  	ExecTaskStreamingRaw(
   544  		ctx context.Context,
   545  		taskID string,
   546  		command []string,
   547  		tty bool,
   548  		stream ExecTaskStream) error
   549  }
   550  
   551  // ExecTaskStream represents a stream of exec streaming messages,
   552  // and is a handle to get stdin and tty size and send back
   553  // stdout/stderr and exit operations.
   554  //
   555  // The methods are not concurrent safe; callers must ensure that methods are called
   556  // from at most one goroutine.
   557  type ExecTaskStream interface {
   558  	// Send relays response message back to API.
   559  	//
   560  	// The call is synchronous and no references to message is held: once
   561  	// method call completes, the message reference can be reused or freed.
   562  	Send(*ExecTaskStreamingResponseMsg) error
   563  
   564  	// Receive exec streaming messages from API.  Returns `io.EOF` on completion of stream.
   565  	Recv() (*ExecTaskStreamingRequestMsg, error)
   566  }
   567  
   568  type ExecTaskStreamingRequestMsg = proto.ExecTaskStreamingRequest
   569  type ExecTaskStreamingResponseMsg = proto.ExecTaskStreamingResponse
   570  
   571  // InternalCapabilitiesDriver is an experimental interface enabling a driver
   572  // to disable some nomad functionality (e.g. logs or metrics).
   573  //
   574  // Intended for internal drivers only while the interface is stabalized.
   575  type InternalCapabilitiesDriver interface {
   576  	InternalCapabilities() InternalCapabilities
   577  }
   578  
   579  // InternalCapabilities flags disabled functionality.
   580  // Zero value means all is supported.
   581  type InternalCapabilities struct {
   582  	DisableLogCollection     bool
   583  	DisableMetricsCollection bool
   584  }