github.com/manicqin/nomad@v0.9.5/plugins/drivers/driver.go (about) 1 package drivers 2 3 import ( 4 "context" 5 "crypto/md5" 6 "fmt" 7 "io" 8 "path/filepath" 9 "sort" 10 "strconv" 11 "time" 12 13 "github.com/hashicorp/nomad/client/allocdir" 14 cstructs "github.com/hashicorp/nomad/client/structs" 15 "github.com/hashicorp/nomad/helper" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/plugins/base" 18 "github.com/hashicorp/nomad/plugins/drivers/proto" 19 "github.com/hashicorp/nomad/plugins/shared/hclspec" 20 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 21 "github.com/zclconf/go-cty/cty" 22 "github.com/zclconf/go-cty/cty/msgpack" 23 ) 24 25 const ( 26 // DriverHealthy is the default health description that should be used 27 // if the driver is nominal 28 DriverHealthy = "Healthy" 29 30 // Pre09TaskHandleVersion is the version used to identify that the task 31 // handle is from a driver that existed before driver plugins (v0.9). The 32 // driver should take appropriate action to handle the old driver state. 33 Pre09TaskHandleVersion = 0 34 ) 35 36 // DriverPlugin is the interface with drivers will implement. It is also 37 // implemented by a plugin client which proxies the calls to go-plugin. See 38 // the proto/driver.proto file for detailed information about each RPC and 39 // message structure. 40 type DriverPlugin interface { 41 base.BasePlugin 42 43 TaskConfigSchema() (*hclspec.Spec, error) 44 Capabilities() (*Capabilities, error) 45 Fingerprint(context.Context) (<-chan *Fingerprint, error) 46 47 RecoverTask(*TaskHandle) error 48 StartTask(*TaskConfig) (*TaskHandle, *DriverNetwork, error) 49 WaitTask(ctx context.Context, taskID string) (<-chan *ExitResult, error) 50 StopTask(taskID string, timeout time.Duration, signal string) error 51 DestroyTask(taskID string, force bool) error 52 InspectTask(taskID string) (*TaskStatus, error) 53 TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) 54 TaskEvents(context.Context) (<-chan *TaskEvent, error) 55 56 SignalTask(taskID string, signal string) error 57 ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) 58 } 59 60 // ExecTaskStreamingDriver marks that a driver supports streaming exec task. This represents a user friendly 61 // interface to implement, as an alternative to the ExecTaskStreamingRawDriver, the low level interface. 62 type ExecTaskStreamingDriver interface { 63 ExecTaskStreaming(ctx context.Context, taskID string, execOptions *ExecOptions) (*ExitResult, error) 64 } 65 66 type ExecOptions struct { 67 // Command is command to run 68 Command []string 69 70 // Tty indicates whether pseudo-terminal is to be allocated 71 Tty bool 72 73 // streams 74 Stdin io.ReadCloser 75 Stdout io.WriteCloser 76 Stderr io.WriteCloser 77 78 // terminal size channel 79 ResizeCh <-chan TerminalSize 80 } 81 82 // DriverNetworkManager is the interface with exposes function for creating a 83 // network namespace for which tasks can join. This only needs to be implemented 84 // if the driver MUST create the network namespace 85 type DriverNetworkManager interface { 86 CreateNetwork(allocID string) (*NetworkIsolationSpec, bool, error) 87 DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error 88 } 89 90 // InternalDriverPlugin is an interface that exposes functions that are only 91 // implemented by internal driver plugins. 92 type InternalDriverPlugin interface { 93 // Shutdown allows the plugin to cleanup any running state to avoid leaking 94 // resources. It should not block. 95 Shutdown() 96 } 97 98 // DriverSignalTaskNotSupported can be embedded by drivers which don't support 99 // the SignalTask RPC. This satisfies the SignalTask func requirement for the 100 // DriverPlugin interface. 101 type DriverSignalTaskNotSupported struct{} 102 103 func (DriverSignalTaskNotSupported) SignalTask(taskID, signal string) error { 104 return fmt.Errorf("SignalTask is not supported by this driver") 105 } 106 107 // DriverExecTaskNotSupported can be embedded by drivers which don't support 108 // the ExecTask RPC. This satisfies the ExecTask func requirement of the 109 // DriverPlugin interface. 110 type DriverExecTaskNotSupported struct{} 111 112 func (_ DriverExecTaskNotSupported) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) { 113 return nil, fmt.Errorf("ExecTask is not supported by this driver") 114 } 115 116 type HealthState string 117 118 var ( 119 HealthStateUndetected = HealthState("undetected") 120 HealthStateUnhealthy = HealthState("unhealthy") 121 HealthStateHealthy = HealthState("healthy") 122 ) 123 124 type Fingerprint struct { 125 Attributes map[string]*pstructs.Attribute 126 Health HealthState 127 HealthDescription string 128 129 // Err is set by the plugin if an error occurred during fingerprinting 130 Err error 131 } 132 133 // FSIsolation is an enumeration to describe what kind of filesystem isolation 134 // a driver supports. 135 type FSIsolation string 136 137 var ( 138 // FSIsolationNone means no isolation. The host filesystem is used. 139 FSIsolationNone = FSIsolation("none") 140 141 // FSIsolationChroot means the driver will use a chroot on the host 142 // filesystem. 143 FSIsolationChroot = FSIsolation("chroot") 144 145 // FSIsolationImage means the driver uses an image. 146 FSIsolationImage = FSIsolation("image") 147 ) 148 149 type Capabilities struct { 150 // SendSignals marks the driver as being able to send signals 151 SendSignals bool 152 153 // Exec marks the driver as being able to execute arbitrary commands 154 // such as health checks. Used by the ScriptExecutor interface. 155 Exec bool 156 157 //FSIsolation indicates what kind of filesystem isolation the driver supports. 158 FSIsolation FSIsolation 159 160 //NetIsolationModes lists the set of isolation modes supported by the driver 161 NetIsolationModes []NetIsolationMode 162 163 // MustInitiateNetwork tells Nomad that the driver must create the network 164 // namespace and that the CreateNetwork and DestroyNetwork RPCs are implemented. 165 MustInitiateNetwork bool 166 } 167 168 func (c *Capabilities) HasNetIsolationMode(m NetIsolationMode) bool { 169 for _, mode := range c.NetIsolationModes { 170 if mode == m { 171 return true 172 } 173 } 174 return false 175 } 176 177 type NetIsolationMode string 178 179 var ( 180 // NetIsolationModeHost disables network isolation and uses the host network 181 NetIsolationModeHost = NetIsolationMode("host") 182 183 // NetIsolationModeGroup uses the group network namespace for isolation 184 NetIsolationModeGroup = NetIsolationMode("group") 185 186 // NetIsolationModeTask isolates the network to just the task 187 NetIsolationModeTask = NetIsolationMode("task") 188 189 // NetIsolationModeNone indicates that there is no network to isolate and is 190 // inteded to be used for tasks that the client manages remotely 191 NetIsolationModeNone = NetIsolationMode("none") 192 ) 193 194 type NetworkIsolationSpec struct { 195 Mode NetIsolationMode 196 Path string 197 Labels map[string]string 198 } 199 200 type TerminalSize struct { 201 Height int 202 Width int 203 } 204 205 type TaskConfig struct { 206 ID string 207 JobName string 208 TaskGroupName string 209 Name string 210 Env map[string]string 211 DeviceEnv map[string]string 212 Resources *Resources 213 Devices []*DeviceConfig 214 Mounts []*MountConfig 215 User string 216 AllocDir string 217 rawDriverConfig []byte 218 StdoutPath string 219 StderrPath string 220 AllocID string 221 NetworkIsolation *NetworkIsolationSpec 222 } 223 224 func (tc *TaskConfig) Copy() *TaskConfig { 225 if tc == nil { 226 return nil 227 } 228 c := new(TaskConfig) 229 *c = *tc 230 c.Env = helper.CopyMapStringString(c.Env) 231 c.DeviceEnv = helper.CopyMapStringString(c.DeviceEnv) 232 c.Resources = tc.Resources.Copy() 233 234 if c.Devices != nil { 235 dc := make([]*DeviceConfig, len(c.Devices)) 236 for i, c := range c.Devices { 237 dc[i] = c.Copy() 238 } 239 c.Devices = dc 240 } 241 242 if c.Mounts != nil { 243 mc := make([]*MountConfig, len(c.Mounts)) 244 for i, m := range c.Mounts { 245 mc[i] = m.Copy() 246 } 247 c.Mounts = mc 248 } 249 250 return c 251 } 252 253 func (tc *TaskConfig) EnvList() []string { 254 l := make([]string, 0, len(tc.Env)) 255 for k, v := range tc.Env { 256 l = append(l, k+"="+v) 257 } 258 259 sort.Strings(l) 260 return l 261 } 262 263 func (tc *TaskConfig) TaskDir() *allocdir.TaskDir { 264 taskDir := filepath.Join(tc.AllocDir, tc.Name) 265 return &allocdir.TaskDir{ 266 Dir: taskDir, 267 SharedAllocDir: filepath.Join(tc.AllocDir, allocdir.SharedAllocName), 268 LogDir: filepath.Join(tc.AllocDir, allocdir.SharedAllocName, allocdir.LogDirName), 269 SharedTaskDir: filepath.Join(taskDir, allocdir.SharedAllocName), 270 LocalDir: filepath.Join(taskDir, allocdir.TaskLocal), 271 SecretsDir: filepath.Join(taskDir, allocdir.TaskSecrets), 272 } 273 } 274 275 func (tc *TaskConfig) DecodeDriverConfig(t interface{}) error { 276 return base.MsgPackDecode(tc.rawDriverConfig, t) 277 } 278 279 func (tc *TaskConfig) EncodeDriverConfig(val cty.Value) error { 280 data, err := msgpack.Marshal(val, val.Type()) 281 if err != nil { 282 return err 283 } 284 285 tc.rawDriverConfig = data 286 return nil 287 } 288 289 func (tc *TaskConfig) EncodeConcreteDriverConfig(t interface{}) error { 290 data := []byte{} 291 err := base.MsgPackEncode(&data, t) 292 if err != nil { 293 return err 294 } 295 296 tc.rawDriverConfig = data 297 return nil 298 } 299 300 type Resources struct { 301 NomadResources *structs.AllocatedTaskResources 302 LinuxResources *LinuxResources 303 } 304 305 func (r *Resources) Copy() *Resources { 306 if r == nil { 307 return nil 308 } 309 res := new(Resources) 310 if r.NomadResources != nil { 311 res.NomadResources = r.NomadResources.Copy() 312 } 313 if r.LinuxResources != nil { 314 res.LinuxResources = r.LinuxResources.Copy() 315 } 316 return res 317 } 318 319 type LinuxResources struct { 320 CPUPeriod int64 321 CPUQuota int64 322 CPUShares int64 323 MemoryLimitBytes int64 324 OOMScoreAdj int64 325 CpusetCPUs string 326 CpusetMems string 327 328 // PrecentTicks is used to calculate the CPUQuota, currently the docker 329 // driver exposes cpu period and quota through the driver configuration 330 // and thus the calculation for CPUQuota cannot be done on the client. 331 // This is a capatability and should only be used by docker until the docker 332 // specific options are deprecated in favor of exposes CPUPeriod and 333 // CPUQuota at the task resource stanza. 334 PercentTicks float64 335 } 336 337 func (r *LinuxResources) Copy() *LinuxResources { 338 res := new(LinuxResources) 339 *res = *r 340 return res 341 } 342 343 type DeviceConfig struct { 344 TaskPath string 345 HostPath string 346 Permissions string 347 } 348 349 func (d *DeviceConfig) Copy() *DeviceConfig { 350 if d == nil { 351 return nil 352 } 353 354 dc := new(DeviceConfig) 355 *dc = *d 356 return dc 357 } 358 359 type MountConfig struct { 360 TaskPath string 361 HostPath string 362 Readonly bool 363 PropagationMode string 364 } 365 366 func (m *MountConfig) IsEqual(o *MountConfig) bool { 367 return m.TaskPath == o.TaskPath && 368 m.HostPath == o.HostPath && 369 m.Readonly == o.Readonly && 370 m.PropagationMode == o.PropagationMode 371 } 372 373 func (m *MountConfig) Copy() *MountConfig { 374 if m == nil { 375 return nil 376 } 377 378 mc := new(MountConfig) 379 *mc = *m 380 return mc 381 } 382 383 const ( 384 TaskStateUnknown TaskState = "unknown" 385 TaskStateRunning TaskState = "running" 386 TaskStateExited TaskState = "exited" 387 ) 388 389 type TaskState string 390 391 type ExitResult struct { 392 ExitCode int 393 Signal int 394 OOMKilled bool 395 Err error 396 TimedOut bool 397 } 398 399 func (r *ExitResult) Successful() bool { 400 return r.ExitCode == 0 && r.Signal == 0 && r.Err == nil 401 } 402 403 func (r *ExitResult) Copy() *ExitResult { 404 if r == nil { 405 return nil 406 } 407 res := new(ExitResult) 408 *res = *r 409 return res 410 } 411 412 type TaskStatus struct { 413 ID string 414 Name string 415 State TaskState 416 StartedAt time.Time 417 CompletedAt time.Time 418 ExitResult *ExitResult 419 DriverAttributes map[string]string 420 NetworkOverride *DriverNetwork 421 } 422 423 type TaskEvent struct { 424 TaskID string 425 TaskName string 426 AllocID string 427 Timestamp time.Time 428 Message string 429 Annotations map[string]string 430 431 // Err is only used if an error occurred while consuming the RPC stream 432 Err error 433 } 434 435 type ExecTaskResult struct { 436 Stdout []byte 437 Stderr []byte 438 ExitResult *ExitResult 439 } 440 441 // DriverNetwork is the network created by driver's (eg Docker's bridge 442 // network) during Prestart. 443 type DriverNetwork struct { 444 // PortMap can be set by drivers to replace ports in environment 445 // variables with driver-specific mappings. 446 PortMap map[string]int 447 448 // IP is the IP address for the task created by the driver. 449 IP string 450 451 // AutoAdvertise indicates whether the driver thinks services that 452 // choose to auto-advertise-addresses should use this IP instead of the 453 // host's. eg If a Docker network plugin is used 454 AutoAdvertise bool 455 } 456 457 // Advertise returns true if the driver suggests using the IP set. May be 458 // called on a nil Network in which case it returns false. 459 func (d *DriverNetwork) Advertise() bool { 460 return d != nil && d.AutoAdvertise 461 } 462 463 // Copy a DriverNetwork struct. If it is nil, nil is returned. 464 func (d *DriverNetwork) Copy() *DriverNetwork { 465 if d == nil { 466 return nil 467 } 468 pm := make(map[string]int, len(d.PortMap)) 469 for k, v := range d.PortMap { 470 pm[k] = v 471 } 472 return &DriverNetwork{ 473 PortMap: pm, 474 IP: d.IP, 475 AutoAdvertise: d.AutoAdvertise, 476 } 477 } 478 479 // Hash the contents of a DriverNetwork struct to detect changes. If it is nil, 480 // an empty slice is returned. 481 func (d *DriverNetwork) Hash() []byte { 482 if d == nil { 483 return []byte{} 484 } 485 h := md5.New() 486 io.WriteString(h, d.IP) 487 io.WriteString(h, strconv.FormatBool(d.AutoAdvertise)) 488 for k, v := range d.PortMap { 489 io.WriteString(h, k) 490 io.WriteString(h, strconv.Itoa(v)) 491 } 492 return h.Sum(nil) 493 } 494 495 //// helper types for operating on raw exec operation 496 // we alias proto instances as much as possible to avoid conversion overhead 497 498 // ExecTaskStreamingRawDriver represents a low-level interface for executing a streaming exec 499 // call, and is intended to be used when driver instance is to delegate exec handling to another 500 // backend, e.g. to a executor or a driver behind a grpc/rpc protocol 501 // 502 // Nomad client would prefer this interface method over `ExecTaskStreaming` if driver implements it. 503 type ExecTaskStreamingRawDriver interface { 504 ExecTaskStreamingRaw( 505 ctx context.Context, 506 taskID string, 507 command []string, 508 tty bool, 509 stream ExecTaskStream) error 510 } 511 512 // ExecTaskStream represents a stream of exec streaming messages, 513 // and is a handle to get stdin and tty size and send back 514 // stdout/stderr and exit operations. 515 // 516 // The methods are not concurrent safe; callers must ensure that methods are called 517 // from at most one goroutine. 518 type ExecTaskStream interface { 519 // Send relays response message back to API. 520 // 521 // The call is synchronous and no references to message is held: once 522 // method call completes, the message reference can be reused or freed. 523 Send(*ExecTaskStreamingResponseMsg) error 524 525 // Receive exec streaming messages from API. Returns `io.EOF` on completion of stream. 526 Recv() (*ExecTaskStreamingRequestMsg, error) 527 } 528 529 type ExecTaskStreamingRequestMsg = proto.ExecTaskStreamingRequest 530 type ExecTaskStreamingResponseMsg = proto.ExecTaskStreamingResponse 531 532 // InternalCapabilitiesDriver is an experimental interface enabling a driver 533 // to disable some nomad functionality (e.g. logs or metrics). 534 // 535 // Intended for internal drivers only while the interface is stabalized. 536 type InternalCapabilitiesDriver interface { 537 InternalCapabilities() InternalCapabilities 538 } 539 540 // InternalCapabilities flags disabled functionality. 541 // Zero value means all is supported. 542 type InternalCapabilities struct { 543 DisableLogCollection bool 544 DisableMetricsCollection bool 545 }