github.com/manicqin/nomad@v0.9.5/plugins/drivers/driver.go (about)

     1  package drivers
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/client/allocdir"
    14  	cstructs "github.com/hashicorp/nomad/client/structs"
    15  	"github.com/hashicorp/nomad/helper"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/plugins/base"
    18  	"github.com/hashicorp/nomad/plugins/drivers/proto"
    19  	"github.com/hashicorp/nomad/plugins/shared/hclspec"
    20  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    21  	"github.com/zclconf/go-cty/cty"
    22  	"github.com/zclconf/go-cty/cty/msgpack"
    23  )
    24  
    25  const (
    26  	// DriverHealthy is the default health description that should be used
    27  	// if the driver is nominal
    28  	DriverHealthy = "Healthy"
    29  
    30  	// Pre09TaskHandleVersion is the version used to identify that the task
    31  	// handle is from a driver that existed before driver plugins (v0.9). The
    32  	// driver should take appropriate action to handle the old driver state.
    33  	Pre09TaskHandleVersion = 0
    34  )
    35  
    36  // DriverPlugin is the interface with drivers will implement. It is also
    37  // implemented by a plugin client which proxies the calls to go-plugin. See
    38  // the proto/driver.proto file for detailed information about each RPC and
    39  // message structure.
    40  type DriverPlugin interface {
    41  	base.BasePlugin
    42  
    43  	TaskConfigSchema() (*hclspec.Spec, error)
    44  	Capabilities() (*Capabilities, error)
    45  	Fingerprint(context.Context) (<-chan *Fingerprint, error)
    46  
    47  	RecoverTask(*TaskHandle) error
    48  	StartTask(*TaskConfig) (*TaskHandle, *DriverNetwork, error)
    49  	WaitTask(ctx context.Context, taskID string) (<-chan *ExitResult, error)
    50  	StopTask(taskID string, timeout time.Duration, signal string) error
    51  	DestroyTask(taskID string, force bool) error
    52  	InspectTask(taskID string) (*TaskStatus, error)
    53  	TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error)
    54  	TaskEvents(context.Context) (<-chan *TaskEvent, error)
    55  
    56  	SignalTask(taskID string, signal string) error
    57  	ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error)
    58  }
    59  
    60  // ExecTaskStreamingDriver marks that a driver supports streaming exec task.  This represents a user friendly
    61  // interface to implement, as an alternative to the ExecTaskStreamingRawDriver, the low level interface.
    62  type ExecTaskStreamingDriver interface {
    63  	ExecTaskStreaming(ctx context.Context, taskID string, execOptions *ExecOptions) (*ExitResult, error)
    64  }
    65  
    66  type ExecOptions struct {
    67  	// Command is command to run
    68  	Command []string
    69  
    70  	// Tty indicates whether pseudo-terminal is to be allocated
    71  	Tty bool
    72  
    73  	// streams
    74  	Stdin  io.ReadCloser
    75  	Stdout io.WriteCloser
    76  	Stderr io.WriteCloser
    77  
    78  	// terminal size channel
    79  	ResizeCh <-chan TerminalSize
    80  }
    81  
    82  // DriverNetworkManager is the interface with exposes function for creating a
    83  // network namespace for which tasks can join. This only needs to be implemented
    84  // if the driver MUST create the network namespace
    85  type DriverNetworkManager interface {
    86  	CreateNetwork(allocID string) (*NetworkIsolationSpec, bool, error)
    87  	DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error
    88  }
    89  
    90  // InternalDriverPlugin is an interface that exposes functions that are only
    91  // implemented by internal driver plugins.
    92  type InternalDriverPlugin interface {
    93  	// Shutdown allows the plugin to cleanup any running state to avoid leaking
    94  	// resources. It should not block.
    95  	Shutdown()
    96  }
    97  
    98  // DriverSignalTaskNotSupported can be embedded by drivers which don't support
    99  // the SignalTask RPC. This satisfies the SignalTask func requirement for the
   100  // DriverPlugin interface.
   101  type DriverSignalTaskNotSupported struct{}
   102  
   103  func (DriverSignalTaskNotSupported) SignalTask(taskID, signal string) error {
   104  	return fmt.Errorf("SignalTask is not supported by this driver")
   105  }
   106  
   107  // DriverExecTaskNotSupported can be embedded by drivers which don't support
   108  // the ExecTask RPC. This satisfies the ExecTask func requirement of the
   109  // DriverPlugin interface.
   110  type DriverExecTaskNotSupported struct{}
   111  
   112  func (_ DriverExecTaskNotSupported) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) {
   113  	return nil, fmt.Errorf("ExecTask is not supported by this driver")
   114  }
   115  
   116  type HealthState string
   117  
   118  var (
   119  	HealthStateUndetected = HealthState("undetected")
   120  	HealthStateUnhealthy  = HealthState("unhealthy")
   121  	HealthStateHealthy    = HealthState("healthy")
   122  )
   123  
   124  type Fingerprint struct {
   125  	Attributes        map[string]*pstructs.Attribute
   126  	Health            HealthState
   127  	HealthDescription string
   128  
   129  	// Err is set by the plugin if an error occurred during fingerprinting
   130  	Err error
   131  }
   132  
   133  // FSIsolation is an enumeration to describe what kind of filesystem isolation
   134  // a driver supports.
   135  type FSIsolation string
   136  
   137  var (
   138  	// FSIsolationNone means no isolation. The host filesystem is used.
   139  	FSIsolationNone = FSIsolation("none")
   140  
   141  	// FSIsolationChroot means the driver will use a chroot on the host
   142  	// filesystem.
   143  	FSIsolationChroot = FSIsolation("chroot")
   144  
   145  	// FSIsolationImage means the driver uses an image.
   146  	FSIsolationImage = FSIsolation("image")
   147  )
   148  
   149  type Capabilities struct {
   150  	// SendSignals marks the driver as being able to send signals
   151  	SendSignals bool
   152  
   153  	// Exec marks the driver as being able to execute arbitrary commands
   154  	// such as health checks. Used by the ScriptExecutor interface.
   155  	Exec bool
   156  
   157  	//FSIsolation indicates what kind of filesystem isolation the driver supports.
   158  	FSIsolation FSIsolation
   159  
   160  	//NetIsolationModes lists the set of isolation modes supported by the driver
   161  	NetIsolationModes []NetIsolationMode
   162  
   163  	// MustInitiateNetwork tells Nomad that the driver must create the network
   164  	// namespace and that the CreateNetwork and DestroyNetwork RPCs are implemented.
   165  	MustInitiateNetwork bool
   166  }
   167  
   168  func (c *Capabilities) HasNetIsolationMode(m NetIsolationMode) bool {
   169  	for _, mode := range c.NetIsolationModes {
   170  		if mode == m {
   171  			return true
   172  		}
   173  	}
   174  	return false
   175  }
   176  
   177  type NetIsolationMode string
   178  
   179  var (
   180  	// NetIsolationModeHost disables network isolation and uses the host network
   181  	NetIsolationModeHost = NetIsolationMode("host")
   182  
   183  	// NetIsolationModeGroup uses the group network namespace for isolation
   184  	NetIsolationModeGroup = NetIsolationMode("group")
   185  
   186  	// NetIsolationModeTask isolates the network to just the task
   187  	NetIsolationModeTask = NetIsolationMode("task")
   188  
   189  	// NetIsolationModeNone indicates that there is no network to isolate and is
   190  	// inteded to be used for tasks that the client manages remotely
   191  	NetIsolationModeNone = NetIsolationMode("none")
   192  )
   193  
   194  type NetworkIsolationSpec struct {
   195  	Mode   NetIsolationMode
   196  	Path   string
   197  	Labels map[string]string
   198  }
   199  
   200  type TerminalSize struct {
   201  	Height int
   202  	Width  int
   203  }
   204  
   205  type TaskConfig struct {
   206  	ID               string
   207  	JobName          string
   208  	TaskGroupName    string
   209  	Name             string
   210  	Env              map[string]string
   211  	DeviceEnv        map[string]string
   212  	Resources        *Resources
   213  	Devices          []*DeviceConfig
   214  	Mounts           []*MountConfig
   215  	User             string
   216  	AllocDir         string
   217  	rawDriverConfig  []byte
   218  	StdoutPath       string
   219  	StderrPath       string
   220  	AllocID          string
   221  	NetworkIsolation *NetworkIsolationSpec
   222  }
   223  
   224  func (tc *TaskConfig) Copy() *TaskConfig {
   225  	if tc == nil {
   226  		return nil
   227  	}
   228  	c := new(TaskConfig)
   229  	*c = *tc
   230  	c.Env = helper.CopyMapStringString(c.Env)
   231  	c.DeviceEnv = helper.CopyMapStringString(c.DeviceEnv)
   232  	c.Resources = tc.Resources.Copy()
   233  
   234  	if c.Devices != nil {
   235  		dc := make([]*DeviceConfig, len(c.Devices))
   236  		for i, c := range c.Devices {
   237  			dc[i] = c.Copy()
   238  		}
   239  		c.Devices = dc
   240  	}
   241  
   242  	if c.Mounts != nil {
   243  		mc := make([]*MountConfig, len(c.Mounts))
   244  		for i, m := range c.Mounts {
   245  			mc[i] = m.Copy()
   246  		}
   247  		c.Mounts = mc
   248  	}
   249  
   250  	return c
   251  }
   252  
   253  func (tc *TaskConfig) EnvList() []string {
   254  	l := make([]string, 0, len(tc.Env))
   255  	for k, v := range tc.Env {
   256  		l = append(l, k+"="+v)
   257  	}
   258  
   259  	sort.Strings(l)
   260  	return l
   261  }
   262  
   263  func (tc *TaskConfig) TaskDir() *allocdir.TaskDir {
   264  	taskDir := filepath.Join(tc.AllocDir, tc.Name)
   265  	return &allocdir.TaskDir{
   266  		Dir:            taskDir,
   267  		SharedAllocDir: filepath.Join(tc.AllocDir, allocdir.SharedAllocName),
   268  		LogDir:         filepath.Join(tc.AllocDir, allocdir.SharedAllocName, allocdir.LogDirName),
   269  		SharedTaskDir:  filepath.Join(taskDir, allocdir.SharedAllocName),
   270  		LocalDir:       filepath.Join(taskDir, allocdir.TaskLocal),
   271  		SecretsDir:     filepath.Join(taskDir, allocdir.TaskSecrets),
   272  	}
   273  }
   274  
   275  func (tc *TaskConfig) DecodeDriverConfig(t interface{}) error {
   276  	return base.MsgPackDecode(tc.rawDriverConfig, t)
   277  }
   278  
   279  func (tc *TaskConfig) EncodeDriverConfig(val cty.Value) error {
   280  	data, err := msgpack.Marshal(val, val.Type())
   281  	if err != nil {
   282  		return err
   283  	}
   284  
   285  	tc.rawDriverConfig = data
   286  	return nil
   287  }
   288  
   289  func (tc *TaskConfig) EncodeConcreteDriverConfig(t interface{}) error {
   290  	data := []byte{}
   291  	err := base.MsgPackEncode(&data, t)
   292  	if err != nil {
   293  		return err
   294  	}
   295  
   296  	tc.rawDriverConfig = data
   297  	return nil
   298  }
   299  
   300  type Resources struct {
   301  	NomadResources *structs.AllocatedTaskResources
   302  	LinuxResources *LinuxResources
   303  }
   304  
   305  func (r *Resources) Copy() *Resources {
   306  	if r == nil {
   307  		return nil
   308  	}
   309  	res := new(Resources)
   310  	if r.NomadResources != nil {
   311  		res.NomadResources = r.NomadResources.Copy()
   312  	}
   313  	if r.LinuxResources != nil {
   314  		res.LinuxResources = r.LinuxResources.Copy()
   315  	}
   316  	return res
   317  }
   318  
   319  type LinuxResources struct {
   320  	CPUPeriod        int64
   321  	CPUQuota         int64
   322  	CPUShares        int64
   323  	MemoryLimitBytes int64
   324  	OOMScoreAdj      int64
   325  	CpusetCPUs       string
   326  	CpusetMems       string
   327  
   328  	// PrecentTicks is used to calculate the CPUQuota, currently the docker
   329  	// driver exposes cpu period and quota through the driver configuration
   330  	// and thus the calculation for CPUQuota cannot be done on the client.
   331  	// This is a capatability and should only be used by docker until the docker
   332  	// specific options are deprecated in favor of exposes CPUPeriod and
   333  	// CPUQuota at the task resource stanza.
   334  	PercentTicks float64
   335  }
   336  
   337  func (r *LinuxResources) Copy() *LinuxResources {
   338  	res := new(LinuxResources)
   339  	*res = *r
   340  	return res
   341  }
   342  
   343  type DeviceConfig struct {
   344  	TaskPath    string
   345  	HostPath    string
   346  	Permissions string
   347  }
   348  
   349  func (d *DeviceConfig) Copy() *DeviceConfig {
   350  	if d == nil {
   351  		return nil
   352  	}
   353  
   354  	dc := new(DeviceConfig)
   355  	*dc = *d
   356  	return dc
   357  }
   358  
   359  type MountConfig struct {
   360  	TaskPath        string
   361  	HostPath        string
   362  	Readonly        bool
   363  	PropagationMode string
   364  }
   365  
   366  func (m *MountConfig) IsEqual(o *MountConfig) bool {
   367  	return m.TaskPath == o.TaskPath &&
   368  		m.HostPath == o.HostPath &&
   369  		m.Readonly == o.Readonly &&
   370  		m.PropagationMode == o.PropagationMode
   371  }
   372  
   373  func (m *MountConfig) Copy() *MountConfig {
   374  	if m == nil {
   375  		return nil
   376  	}
   377  
   378  	mc := new(MountConfig)
   379  	*mc = *m
   380  	return mc
   381  }
   382  
   383  const (
   384  	TaskStateUnknown TaskState = "unknown"
   385  	TaskStateRunning TaskState = "running"
   386  	TaskStateExited  TaskState = "exited"
   387  )
   388  
   389  type TaskState string
   390  
   391  type ExitResult struct {
   392  	ExitCode  int
   393  	Signal    int
   394  	OOMKilled bool
   395  	Err       error
   396  	TimedOut  bool
   397  }
   398  
   399  func (r *ExitResult) Successful() bool {
   400  	return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil
   401  }
   402  
   403  func (r *ExitResult) Copy() *ExitResult {
   404  	if r == nil {
   405  		return nil
   406  	}
   407  	res := new(ExitResult)
   408  	*res = *r
   409  	return res
   410  }
   411  
   412  type TaskStatus struct {
   413  	ID               string
   414  	Name             string
   415  	State            TaskState
   416  	StartedAt        time.Time
   417  	CompletedAt      time.Time
   418  	ExitResult       *ExitResult
   419  	DriverAttributes map[string]string
   420  	NetworkOverride  *DriverNetwork
   421  }
   422  
   423  type TaskEvent struct {
   424  	TaskID      string
   425  	TaskName    string
   426  	AllocID     string
   427  	Timestamp   time.Time
   428  	Message     string
   429  	Annotations map[string]string
   430  
   431  	// Err is only used if an error occurred while consuming the RPC stream
   432  	Err error
   433  }
   434  
   435  type ExecTaskResult struct {
   436  	Stdout     []byte
   437  	Stderr     []byte
   438  	ExitResult *ExitResult
   439  }
   440  
   441  // DriverNetwork is the network created by driver's (eg Docker's bridge
   442  // network) during Prestart.
   443  type DriverNetwork struct {
   444  	// PortMap can be set by drivers to replace ports in environment
   445  	// variables with driver-specific mappings.
   446  	PortMap map[string]int
   447  
   448  	// IP is the IP address for the task created by the driver.
   449  	IP string
   450  
   451  	// AutoAdvertise indicates whether the driver thinks services that
   452  	// choose to auto-advertise-addresses should use this IP instead of the
   453  	// host's. eg If a Docker network plugin is used
   454  	AutoAdvertise bool
   455  }
   456  
   457  // Advertise returns true if the driver suggests using the IP set. May be
   458  // called on a nil Network in which case it returns false.
   459  func (d *DriverNetwork) Advertise() bool {
   460  	return d != nil && d.AutoAdvertise
   461  }
   462  
   463  // Copy a DriverNetwork struct. If it is nil, nil is returned.
   464  func (d *DriverNetwork) Copy() *DriverNetwork {
   465  	if d == nil {
   466  		return nil
   467  	}
   468  	pm := make(map[string]int, len(d.PortMap))
   469  	for k, v := range d.PortMap {
   470  		pm[k] = v
   471  	}
   472  	return &DriverNetwork{
   473  		PortMap:       pm,
   474  		IP:            d.IP,
   475  		AutoAdvertise: d.AutoAdvertise,
   476  	}
   477  }
   478  
   479  // Hash the contents of a DriverNetwork struct to detect changes. If it is nil,
   480  // an empty slice is returned.
   481  func (d *DriverNetwork) Hash() []byte {
   482  	if d == nil {
   483  		return []byte{}
   484  	}
   485  	h := md5.New()
   486  	io.WriteString(h, d.IP)
   487  	io.WriteString(h, strconv.FormatBool(d.AutoAdvertise))
   488  	for k, v := range d.PortMap {
   489  		io.WriteString(h, k)
   490  		io.WriteString(h, strconv.Itoa(v))
   491  	}
   492  	return h.Sum(nil)
   493  }
   494  
   495  //// helper types for operating on raw exec operation
   496  // we alias proto instances as much as possible to avoid conversion overhead
   497  
   498  // ExecTaskStreamingRawDriver represents a low-level interface for executing a streaming exec
   499  // call, and is intended to be used when driver instance is to delegate exec handling to another
   500  // backend, e.g. to a executor or a driver behind a grpc/rpc protocol
   501  //
   502  // Nomad client would prefer this interface method over `ExecTaskStreaming` if driver implements it.
   503  type ExecTaskStreamingRawDriver interface {
   504  	ExecTaskStreamingRaw(
   505  		ctx context.Context,
   506  		taskID string,
   507  		command []string,
   508  		tty bool,
   509  		stream ExecTaskStream) error
   510  }
   511  
   512  // ExecTaskStream represents a stream of exec streaming messages,
   513  // and is a handle to get stdin and tty size and send back
   514  // stdout/stderr and exit operations.
   515  //
   516  // The methods are not concurrent safe; callers must ensure that methods are called
   517  // from at most one goroutine.
   518  type ExecTaskStream interface {
   519  	// Send relays response message back to API.
   520  	//
   521  	// The call is synchronous and no references to message is held: once
   522  	// method call completes, the message reference can be reused or freed.
   523  	Send(*ExecTaskStreamingResponseMsg) error
   524  
   525  	// Receive exec streaming messages from API.  Returns `io.EOF` on completion of stream.
   526  	Recv() (*ExecTaskStreamingRequestMsg, error)
   527  }
   528  
   529  type ExecTaskStreamingRequestMsg = proto.ExecTaskStreamingRequest
   530  type ExecTaskStreamingResponseMsg = proto.ExecTaskStreamingResponse
   531  
   532  // InternalCapabilitiesDriver is an experimental interface enabling a driver
   533  // to disable some nomad functionality (e.g. logs or metrics).
   534  //
   535  // Intended for internal drivers only while the interface is stabalized.
   536  type InternalCapabilitiesDriver interface {
   537  	InternalCapabilities() InternalCapabilities
   538  }
   539  
   540  // InternalCapabilities flags disabled functionality.
   541  // Zero value means all is supported.
   542  type InternalCapabilities struct {
   543  	DisableLogCollection     bool
   544  	DisableMetricsCollection bool
   545  }