github.com/janma/nomad@v0.11.3/drivers/exec/driver.go (about) 1 package exec 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "runtime" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/consul-template/signals" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/client/fingerprint" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/helper" 18 "github.com/hashicorp/nomad/helper/pluginutils/loader" 19 "github.com/hashicorp/nomad/plugins/base" 20 "github.com/hashicorp/nomad/plugins/drivers" 21 "github.com/hashicorp/nomad/plugins/drivers/utils" 22 "github.com/hashicorp/nomad/plugins/shared/hclspec" 23 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 24 ) 25 26 const ( 27 // pluginName is the name of the plugin 28 pluginName = "exec" 29 30 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 31 fingerprintPeriod = 30 * time.Second 32 33 // taskHandleVersion is the version of task handle which this driver sets 34 // and understands how to decode driver state 35 taskHandleVersion = 1 36 ) 37 38 var ( 39 // PluginID is the exec plugin metadata registered in the plugin 40 // catalog. 41 PluginID = loader.PluginID{ 42 Name: pluginName, 43 PluginType: base.PluginTypeDriver, 44 } 45 46 // PluginConfig is the exec driver factory function registered in the 47 // plugin catalog. 48 PluginConfig = &loader.InternalPluginConfig{ 49 Config: map[string]interface{}{}, 50 Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewExecDriver(ctx, l) }, 51 } 52 53 // pluginInfo is the response returned for the PluginInfo RPC 54 pluginInfo = &base.PluginInfoResponse{ 55 Type: base.PluginTypeDriver, 56 PluginApiVersions: []string{drivers.ApiVersion010}, 57 PluginVersion: "0.1.0", 58 Name: pluginName, 59 } 60 61 // configSpec is the hcl specification returned by the ConfigSchema RPC 62 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 63 "no_pivot_root": hclspec.NewDefault( 64 hclspec.NewAttr("no_pivot_root", "bool", false), 65 hclspec.NewLiteral("false"), 66 ), 67 }) 68 69 // taskConfigSpec is the hcl specification for the driver config section of 70 // a task within a job. It is returned in the TaskConfigSchema RPC 71 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 72 "command": hclspec.NewAttr("command", "string", true), 73 "args": hclspec.NewAttr("args", "list(string)", false), 74 }) 75 76 // capabilities is returned by the Capabilities RPC and indicates what 77 // optional features this driver supports 78 capabilities = &drivers.Capabilities{ 79 SendSignals: true, 80 Exec: true, 81 FSIsolation: drivers.FSIsolationChroot, 82 NetIsolationModes: []drivers.NetIsolationMode{ 83 drivers.NetIsolationModeHost, 84 drivers.NetIsolationModeGroup, 85 }, 86 MountConfigs: drivers.MountConfigSupportAll, 87 } 88 ) 89 90 // Driver fork/execs tasks using many of the underlying OS's isolation 91 // features where configured. 92 type Driver struct { 93 // eventer is used to handle multiplexing of TaskEvents calls such that an 94 // event can be broadcast to all callers 95 eventer *eventer.Eventer 96 97 // config is the driver configuration set by the SetConfig RPC 98 config Config 99 100 // nomadConfig is the client config from nomad 101 nomadConfig *base.ClientDriverConfig 102 103 // tasks is the in memory datastore mapping taskIDs to driverHandles 104 tasks *taskStore 105 106 // ctx is the context for the driver. It is passed to other subsystems to 107 // coordinate shutdown 108 ctx context.Context 109 110 // logger will log to the Nomad agent 111 logger hclog.Logger 112 113 // A tri-state boolean to know if the fingerprinting has happened and 114 // whether it has been successful 115 fingerprintSuccess *bool 116 fingerprintLock sync.Mutex 117 } 118 119 // Config is the driver configuration set by the SetConfig RPC call 120 type Config struct { 121 // NoPivotRoot disables the use of pivot_root, useful when the root partition 122 // is on ramdisk 123 NoPivotRoot bool `codec:"no_pivot_root"` 124 } 125 126 // TaskConfig is the driver configuration of a task within a job 127 type TaskConfig struct { 128 Command string `codec:"command"` 129 Args []string `codec:"args"` 130 } 131 132 // TaskState is the state which is encoded in the handle returned in 133 // StartTask. This information is needed to rebuild the task state and handler 134 // during recovery. 135 type TaskState struct { 136 ReattachConfig *pstructs.ReattachConfig 137 TaskConfig *drivers.TaskConfig 138 Pid int 139 StartedAt time.Time 140 } 141 142 // NewExecDriver returns a new DrivePlugin implementation 143 func NewExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { 144 logger = logger.Named(pluginName) 145 return &Driver{ 146 eventer: eventer.NewEventer(ctx, logger), 147 tasks: newTaskStore(), 148 ctx: ctx, 149 logger: logger, 150 } 151 } 152 153 // setFingerprintSuccess marks the driver as having fingerprinted successfully 154 func (d *Driver) setFingerprintSuccess() { 155 d.fingerprintLock.Lock() 156 d.fingerprintSuccess = helper.BoolToPtr(true) 157 d.fingerprintLock.Unlock() 158 } 159 160 // setFingerprintFailure marks the driver as having failed fingerprinting 161 func (d *Driver) setFingerprintFailure() { 162 d.fingerprintLock.Lock() 163 d.fingerprintSuccess = helper.BoolToPtr(false) 164 d.fingerprintLock.Unlock() 165 } 166 167 // fingerprintSuccessful returns true if the driver has 168 // never fingerprinted or has successfully fingerprinted 169 func (d *Driver) fingerprintSuccessful() bool { 170 d.fingerprintLock.Lock() 171 defer d.fingerprintLock.Unlock() 172 return d.fingerprintSuccess == nil || *d.fingerprintSuccess 173 } 174 175 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 176 return pluginInfo, nil 177 } 178 179 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 180 return configSpec, nil 181 } 182 183 func (d *Driver) SetConfig(cfg *base.Config) error { 184 var config Config 185 if len(cfg.PluginConfig) != 0 { 186 if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { 187 return err 188 } 189 } 190 191 d.config = config 192 if cfg != nil && cfg.AgentConfig != nil { 193 d.nomadConfig = cfg.AgentConfig.Driver 194 } 195 return nil 196 } 197 198 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 199 return taskConfigSpec, nil 200 } 201 202 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 203 return capabilities, nil 204 } 205 206 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 207 ch := make(chan *drivers.Fingerprint) 208 go d.handleFingerprint(ctx, ch) 209 return ch, nil 210 211 } 212 func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) { 213 defer close(ch) 214 ticker := time.NewTimer(0) 215 for { 216 select { 217 case <-ctx.Done(): 218 return 219 case <-d.ctx.Done(): 220 return 221 case <-ticker.C: 222 ticker.Reset(fingerprintPeriod) 223 ch <- d.buildFingerprint() 224 } 225 } 226 } 227 228 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 229 if runtime.GOOS != "linux" { 230 d.setFingerprintFailure() 231 return &drivers.Fingerprint{ 232 Health: drivers.HealthStateUndetected, 233 HealthDescription: "exec driver unsupported on client OS", 234 } 235 } 236 237 fp := &drivers.Fingerprint{ 238 Attributes: map[string]*pstructs.Attribute{}, 239 Health: drivers.HealthStateHealthy, 240 HealthDescription: drivers.DriverHealthy, 241 } 242 243 if !utils.IsUnixRoot() { 244 fp.Health = drivers.HealthStateUndetected 245 fp.HealthDescription = drivers.DriverRequiresRootMessage 246 d.setFingerprintFailure() 247 return fp 248 } 249 250 mount, err := fingerprint.FindCgroupMountpointDir() 251 if err != nil { 252 fp.Health = drivers.HealthStateUnhealthy 253 fp.HealthDescription = drivers.NoCgroupMountMessage 254 if d.fingerprintSuccessful() { 255 d.logger.Warn(fp.HealthDescription, "error", err) 256 } 257 d.setFingerprintFailure() 258 return fp 259 } 260 261 if mount == "" { 262 fp.Health = drivers.HealthStateUnhealthy 263 fp.HealthDescription = drivers.CgroupMountEmpty 264 d.setFingerprintFailure() 265 return fp 266 } 267 268 fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true) 269 d.setFingerprintSuccess() 270 return fp 271 } 272 273 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 274 if handle == nil { 275 return fmt.Errorf("handle cannot be nil") 276 } 277 278 // COMPAT(0.10): pre 0.9 upgrade path check 279 if handle.Version == 0 { 280 return d.recoverPre09Task(handle) 281 } 282 283 // If already attached to handle there's nothing to recover. 284 if _, ok := d.tasks.Get(handle.Config.ID); ok { 285 d.logger.Trace("nothing to recover; task already exists", 286 "task_id", handle.Config.ID, 287 "task_name", handle.Config.Name, 288 ) 289 return nil 290 } 291 292 // Handle doesn't already exist, try to reattach 293 var taskState TaskState 294 if err := handle.GetDriverState(&taskState); err != nil { 295 d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID) 296 return fmt.Errorf("failed to decode task state from handle: %v", err) 297 } 298 299 // Create client for reattached executor 300 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 301 if err != nil { 302 d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID) 303 return fmt.Errorf("failed to build ReattachConfig from task state: %v", err) 304 } 305 306 exec, pluginClient, err := executor.ReattachToExecutor(plugRC, 307 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 308 if err != nil { 309 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 310 return fmt.Errorf("failed to reattach to executor: %v", err) 311 } 312 313 h := &taskHandle{ 314 exec: exec, 315 pid: taskState.Pid, 316 pluginClient: pluginClient, 317 taskConfig: taskState.TaskConfig, 318 procState: drivers.TaskStateRunning, 319 startedAt: taskState.StartedAt, 320 exitResult: &drivers.ExitResult{}, 321 logger: d.logger, 322 } 323 324 d.tasks.Set(taskState.TaskConfig.ID, h) 325 326 go h.run() 327 return nil 328 } 329 330 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 331 if _, ok := d.tasks.Get(cfg.ID); ok { 332 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 333 } 334 335 var driverConfig TaskConfig 336 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 337 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 338 } 339 340 d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig)) 341 handle := drivers.NewTaskHandle(taskHandleVersion) 342 handle.Config = cfg 343 344 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 345 executorConfig := &executor.ExecutorConfig{ 346 LogFile: pluginLogFile, 347 LogLevel: "debug", 348 FSIsolation: true, 349 } 350 351 exec, pluginClient, err := executor.CreateExecutor( 352 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID), 353 d.nomadConfig, executorConfig) 354 if err != nil { 355 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 356 } 357 358 user := cfg.User 359 if user == "" { 360 user = "nobody" 361 } 362 363 execCmd := &executor.ExecCommand{ 364 Cmd: driverConfig.Command, 365 Args: driverConfig.Args, 366 Env: cfg.EnvList(), 367 User: user, 368 ResourceLimits: true, 369 NoPivotRoot: d.config.NoPivotRoot, 370 Resources: cfg.Resources, 371 TaskDir: cfg.TaskDir().Dir, 372 StdoutPath: cfg.StdoutPath, 373 StderrPath: cfg.StderrPath, 374 Mounts: cfg.Mounts, 375 Devices: cfg.Devices, 376 NetworkIsolation: cfg.NetworkIsolation, 377 } 378 379 ps, err := exec.Launch(execCmd) 380 if err != nil { 381 pluginClient.Kill() 382 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 383 } 384 385 h := &taskHandle{ 386 exec: exec, 387 pid: ps.Pid, 388 pluginClient: pluginClient, 389 taskConfig: cfg, 390 procState: drivers.TaskStateRunning, 391 startedAt: time.Now().Round(time.Millisecond), 392 logger: d.logger, 393 } 394 395 driverState := TaskState{ 396 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 397 Pid: ps.Pid, 398 TaskConfig: cfg, 399 StartedAt: h.startedAt, 400 } 401 402 if err := handle.SetDriverState(&driverState); err != nil { 403 d.logger.Error("failed to start task, error setting driver state", "error", err) 404 exec.Shutdown("", 0) 405 pluginClient.Kill() 406 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 407 } 408 409 d.tasks.Set(cfg.ID, h) 410 go h.run() 411 return handle, nil, nil 412 } 413 414 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 415 handle, ok := d.tasks.Get(taskID) 416 if !ok { 417 return nil, drivers.ErrTaskNotFound 418 } 419 420 ch := make(chan *drivers.ExitResult) 421 go d.handleWait(ctx, handle, ch) 422 423 return ch, nil 424 } 425 426 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 427 defer close(ch) 428 var result *drivers.ExitResult 429 ps, err := handle.exec.Wait(ctx) 430 if err != nil { 431 result = &drivers.ExitResult{ 432 Err: fmt.Errorf("executor: error waiting on process: %v", err), 433 } 434 } else { 435 result = &drivers.ExitResult{ 436 ExitCode: ps.ExitCode, 437 Signal: ps.Signal, 438 } 439 } 440 441 select { 442 case <-ctx.Done(): 443 return 444 case <-d.ctx.Done(): 445 return 446 case ch <- result: 447 } 448 } 449 450 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 451 handle, ok := d.tasks.Get(taskID) 452 if !ok { 453 return drivers.ErrTaskNotFound 454 } 455 456 if err := handle.exec.Shutdown(signal, timeout); err != nil { 457 if handle.pluginClient.Exited() { 458 return nil 459 } 460 return fmt.Errorf("executor Shutdown failed: %v", err) 461 } 462 463 return nil 464 } 465 466 func (d *Driver) DestroyTask(taskID string, force bool) error { 467 handle, ok := d.tasks.Get(taskID) 468 if !ok { 469 return drivers.ErrTaskNotFound 470 } 471 472 if handle.IsRunning() && !force { 473 return fmt.Errorf("cannot destroy running task") 474 } 475 476 if !handle.pluginClient.Exited() { 477 if err := handle.exec.Shutdown("", 0); err != nil { 478 handle.logger.Error("destroying executor failed", "err", err) 479 } 480 481 handle.pluginClient.Kill() 482 } 483 484 d.tasks.Delete(taskID) 485 return nil 486 } 487 488 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 489 handle, ok := d.tasks.Get(taskID) 490 if !ok { 491 return nil, drivers.ErrTaskNotFound 492 } 493 494 return handle.TaskStatus(), nil 495 } 496 497 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 498 handle, ok := d.tasks.Get(taskID) 499 if !ok { 500 return nil, drivers.ErrTaskNotFound 501 } 502 503 return handle.exec.Stats(ctx, interval) 504 } 505 506 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 507 return d.eventer.TaskEvents(ctx) 508 } 509 510 func (d *Driver) SignalTask(taskID string, signal string) error { 511 handle, ok := d.tasks.Get(taskID) 512 if !ok { 513 return drivers.ErrTaskNotFound 514 } 515 516 sig := os.Interrupt 517 if s, ok := signals.SignalLookup[signal]; ok { 518 sig = s 519 } else { 520 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 521 522 } 523 return handle.exec.Signal(sig) 524 } 525 526 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 527 if len(cmd) == 0 { 528 return nil, fmt.Errorf("error cmd must have at least one value") 529 } 530 handle, ok := d.tasks.Get(taskID) 531 if !ok { 532 return nil, drivers.ErrTaskNotFound 533 } 534 535 args := []string{} 536 if len(cmd) > 1 { 537 args = cmd[1:] 538 } 539 540 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args) 541 if err != nil { 542 return nil, err 543 } 544 545 return &drivers.ExecTaskResult{ 546 Stdout: out, 547 ExitResult: &drivers.ExitResult{ 548 ExitCode: exitCode, 549 }, 550 }, nil 551 } 552 553 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 554 555 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 556 taskID string, 557 command []string, 558 tty bool, 559 stream drivers.ExecTaskStream) error { 560 561 if len(command) == 0 { 562 return fmt.Errorf("error cmd must have at least one value") 563 } 564 handle, ok := d.tasks.Get(taskID) 565 if !ok { 566 return drivers.ErrTaskNotFound 567 } 568 569 return handle.exec.ExecStreaming(ctx, command, tty, stream) 570 }