github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/drivers/exec/driver.go (about) 1 package exec 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "runtime" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/consul-template/signals" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/client/fingerprint" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/helper" 18 "github.com/hashicorp/nomad/helper/pluginutils/loader" 19 "github.com/hashicorp/nomad/plugins/base" 20 "github.com/hashicorp/nomad/plugins/drivers" 21 "github.com/hashicorp/nomad/plugins/drivers/utils" 22 "github.com/hashicorp/nomad/plugins/shared/hclspec" 23 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 24 ) 25 26 const ( 27 // pluginName is the name of the plugin 28 pluginName = "exec" 29 30 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 31 fingerprintPeriod = 30 * time.Second 32 33 // taskHandleVersion is the version of task handle which this driver sets 34 // and understands how to decode driver state 35 taskHandleVersion = 1 36 ) 37 38 var ( 39 // PluginID is the exec plugin metadata registered in the plugin 40 // catalog. 41 PluginID = loader.PluginID{ 42 Name: pluginName, 43 PluginType: base.PluginTypeDriver, 44 } 45 46 // PluginConfig is the exec driver factory function registered in the 47 // plugin catalog. 48 PluginConfig = &loader.InternalPluginConfig{ 49 Config: map[string]interface{}{}, 50 Factory: func(l hclog.Logger) interface{} { return NewExecDriver(l) }, 51 } 52 53 // pluginInfo is the response returned for the PluginInfo RPC 54 pluginInfo = &base.PluginInfoResponse{ 55 Type: base.PluginTypeDriver, 56 PluginApiVersions: []string{drivers.ApiVersion010}, 57 PluginVersion: "0.1.0", 58 Name: pluginName, 59 } 60 61 // configSpec is the hcl specification returned by the ConfigSchema RPC 62 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 63 "no_pivot_root": hclspec.NewDefault( 64 hclspec.NewAttr("no_pivot_root", "bool", false), 65 hclspec.NewLiteral("false"), 66 ), 67 }) 68 69 // taskConfigSpec is the hcl specification for the driver config section of 70 // a task within a job. It is returned in the TaskConfigSchema RPC 71 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 72 "command": hclspec.NewAttr("command", "string", true), 73 "args": hclspec.NewAttr("args", "list(string)", false), 74 }) 75 76 // capabilities is returned by the Capabilities RPC and indicates what 77 // optional features this driver supports 78 capabilities = &drivers.Capabilities{ 79 SendSignals: true, 80 Exec: true, 81 FSIsolation: drivers.FSIsolationChroot, 82 NetIsolationModes: []drivers.NetIsolationMode{ 83 drivers.NetIsolationModeHost, 84 drivers.NetIsolationModeGroup, 85 }, 86 } 87 ) 88 89 // Driver fork/execs tasks using many of the underlying OS's isolation 90 // features where configured. 91 type Driver struct { 92 // eventer is used to handle multiplexing of TaskEvents calls such that an 93 // event can be broadcast to all callers 94 eventer *eventer.Eventer 95 96 // config is the driver configuration set by the SetConfig RPC 97 config Config 98 99 // nomadConfig is the client config from nomad 100 nomadConfig *base.ClientDriverConfig 101 102 // tasks is the in memory datastore mapping taskIDs to driverHandles 103 tasks *taskStore 104 105 // ctx is the context for the driver. It is passed to other subsystems to 106 // coordinate shutdown 107 ctx context.Context 108 109 // signalShutdown is called when the driver is shutting down and cancels the 110 // ctx passed to any subsystems 111 signalShutdown context.CancelFunc 112 113 // logger will log to the Nomad agent 114 logger hclog.Logger 115 116 // A tri-state boolean to know if the fingerprinting has happened and 117 // whether it has been successful 118 fingerprintSuccess *bool 119 fingerprintLock sync.Mutex 120 } 121 122 // Config is the driver configuration set by the SetConfig RPC call 123 type Config struct { 124 // NoPivotRoot disables the use of pivot_root, useful when the root partition 125 // is on ramdisk 126 NoPivotRoot bool `codec:"no_pivot_root"` 127 } 128 129 // TaskConfig is the driver configuration of a task within a job 130 type TaskConfig struct { 131 Command string `codec:"command"` 132 Args []string `codec:"args"` 133 } 134 135 // TaskState is the state which is encoded in the handle returned in 136 // StartTask. This information is needed to rebuild the task state and handler 137 // during recovery. 138 type TaskState struct { 139 ReattachConfig *pstructs.ReattachConfig 140 TaskConfig *drivers.TaskConfig 141 Pid int 142 StartedAt time.Time 143 } 144 145 // NewExecDriver returns a new DrivePlugin implementation 146 func NewExecDriver(logger hclog.Logger) drivers.DriverPlugin { 147 ctx, cancel := context.WithCancel(context.Background()) 148 logger = logger.Named(pluginName) 149 return &Driver{ 150 eventer: eventer.NewEventer(ctx, logger), 151 tasks: newTaskStore(), 152 ctx: ctx, 153 signalShutdown: cancel, 154 logger: logger, 155 } 156 } 157 158 // setFingerprintSuccess marks the driver as having fingerprinted successfully 159 func (d *Driver) setFingerprintSuccess() { 160 d.fingerprintLock.Lock() 161 d.fingerprintSuccess = helper.BoolToPtr(true) 162 d.fingerprintLock.Unlock() 163 } 164 165 // setFingerprintFailure marks the driver as having failed fingerprinting 166 func (d *Driver) setFingerprintFailure() { 167 d.fingerprintLock.Lock() 168 d.fingerprintSuccess = helper.BoolToPtr(false) 169 d.fingerprintLock.Unlock() 170 } 171 172 // fingerprintSuccessful returns true if the driver has 173 // never fingerprinted or has successfully fingerprinted 174 func (d *Driver) fingerprintSuccessful() bool { 175 d.fingerprintLock.Lock() 176 defer d.fingerprintLock.Unlock() 177 return d.fingerprintSuccess == nil || *d.fingerprintSuccess 178 } 179 180 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 181 return pluginInfo, nil 182 } 183 184 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 185 return configSpec, nil 186 } 187 188 func (d *Driver) SetConfig(cfg *base.Config) error { 189 var config Config 190 if len(cfg.PluginConfig) != 0 { 191 if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { 192 return err 193 } 194 } 195 196 d.config = config 197 if cfg != nil && cfg.AgentConfig != nil { 198 d.nomadConfig = cfg.AgentConfig.Driver 199 } 200 return nil 201 } 202 203 func (d *Driver) Shutdown() { 204 d.signalShutdown() 205 } 206 207 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 208 return taskConfigSpec, nil 209 } 210 211 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 212 return capabilities, nil 213 } 214 215 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 216 ch := make(chan *drivers.Fingerprint) 217 go d.handleFingerprint(ctx, ch) 218 return ch, nil 219 220 } 221 func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) { 222 defer close(ch) 223 ticker := time.NewTimer(0) 224 for { 225 select { 226 case <-ctx.Done(): 227 return 228 case <-d.ctx.Done(): 229 return 230 case <-ticker.C: 231 ticker.Reset(fingerprintPeriod) 232 ch <- d.buildFingerprint() 233 } 234 } 235 } 236 237 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 238 if runtime.GOOS != "linux" { 239 d.setFingerprintFailure() 240 return &drivers.Fingerprint{ 241 Health: drivers.HealthStateUndetected, 242 HealthDescription: "exec driver unsupported on client OS", 243 } 244 } 245 246 fp := &drivers.Fingerprint{ 247 Attributes: map[string]*pstructs.Attribute{}, 248 Health: drivers.HealthStateHealthy, 249 HealthDescription: drivers.DriverHealthy, 250 } 251 252 if !utils.IsUnixRoot() { 253 fp.Health = drivers.HealthStateUndetected 254 fp.HealthDescription = drivers.DriverRequiresRootMessage 255 d.setFingerprintFailure() 256 return fp 257 } 258 259 mount, err := fingerprint.FindCgroupMountpointDir() 260 if err != nil { 261 fp.Health = drivers.HealthStateUnhealthy 262 fp.HealthDescription = drivers.NoCgroupMountMessage 263 if d.fingerprintSuccessful() { 264 d.logger.Warn(fp.HealthDescription, "error", err) 265 } 266 d.setFingerprintFailure() 267 return fp 268 } 269 270 if mount == "" { 271 fp.Health = drivers.HealthStateUnhealthy 272 fp.HealthDescription = drivers.CgroupMountEmpty 273 d.setFingerprintFailure() 274 return fp 275 } 276 277 fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true) 278 d.setFingerprintSuccess() 279 return fp 280 } 281 282 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 283 if handle == nil { 284 return fmt.Errorf("handle cannot be nil") 285 } 286 287 // COMPAT(0.10): pre 0.9 upgrade path check 288 if handle.Version == 0 { 289 return d.recoverPre09Task(handle) 290 } 291 292 // If already attached to handle there's nothing to recover. 293 if _, ok := d.tasks.Get(handle.Config.ID); ok { 294 d.logger.Trace("nothing to recover; task already exists", 295 "task_id", handle.Config.ID, 296 "task_name", handle.Config.Name, 297 ) 298 return nil 299 } 300 301 // Handle doesn't already exist, try to reattach 302 var taskState TaskState 303 if err := handle.GetDriverState(&taskState); err != nil { 304 d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID) 305 return fmt.Errorf("failed to decode task state from handle: %v", err) 306 } 307 308 // Create client for reattached executor 309 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 310 if err != nil { 311 d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID) 312 return fmt.Errorf("failed to build ReattachConfig from task state: %v", err) 313 } 314 315 exec, pluginClient, err := executor.ReattachToExecutor(plugRC, 316 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 317 if err != nil { 318 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 319 return fmt.Errorf("failed to reattach to executor: %v", err) 320 } 321 322 h := &taskHandle{ 323 exec: exec, 324 pid: taskState.Pid, 325 pluginClient: pluginClient, 326 taskConfig: taskState.TaskConfig, 327 procState: drivers.TaskStateRunning, 328 startedAt: taskState.StartedAt, 329 exitResult: &drivers.ExitResult{}, 330 logger: d.logger, 331 } 332 333 d.tasks.Set(taskState.TaskConfig.ID, h) 334 335 go h.run() 336 return nil 337 } 338 339 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 340 if _, ok := d.tasks.Get(cfg.ID); ok { 341 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 342 } 343 344 var driverConfig TaskConfig 345 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 346 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 347 } 348 349 d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig)) 350 handle := drivers.NewTaskHandle(taskHandleVersion) 351 handle.Config = cfg 352 353 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 354 executorConfig := &executor.ExecutorConfig{ 355 LogFile: pluginLogFile, 356 LogLevel: "debug", 357 FSIsolation: true, 358 } 359 360 exec, pluginClient, err := executor.CreateExecutor( 361 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID), 362 d.nomadConfig, executorConfig) 363 if err != nil { 364 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 365 } 366 367 user := cfg.User 368 if user == "" { 369 user = "nobody" 370 } 371 372 execCmd := &executor.ExecCommand{ 373 Cmd: driverConfig.Command, 374 Args: driverConfig.Args, 375 Env: cfg.EnvList(), 376 User: user, 377 ResourceLimits: true, 378 NoPivotRoot: d.config.NoPivotRoot, 379 Resources: cfg.Resources, 380 TaskDir: cfg.TaskDir().Dir, 381 StdoutPath: cfg.StdoutPath, 382 StderrPath: cfg.StderrPath, 383 Mounts: cfg.Mounts, 384 Devices: cfg.Devices, 385 NetworkIsolation: cfg.NetworkIsolation, 386 } 387 388 ps, err := exec.Launch(execCmd) 389 if err != nil { 390 pluginClient.Kill() 391 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 392 } 393 394 h := &taskHandle{ 395 exec: exec, 396 pid: ps.Pid, 397 pluginClient: pluginClient, 398 taskConfig: cfg, 399 procState: drivers.TaskStateRunning, 400 startedAt: time.Now().Round(time.Millisecond), 401 logger: d.logger, 402 } 403 404 driverState := TaskState{ 405 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 406 Pid: ps.Pid, 407 TaskConfig: cfg, 408 StartedAt: h.startedAt, 409 } 410 411 if err := handle.SetDriverState(&driverState); err != nil { 412 d.logger.Error("failed to start task, error setting driver state", "error", err) 413 exec.Shutdown("", 0) 414 pluginClient.Kill() 415 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 416 } 417 418 d.tasks.Set(cfg.ID, h) 419 go h.run() 420 return handle, nil, nil 421 } 422 423 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 424 handle, ok := d.tasks.Get(taskID) 425 if !ok { 426 return nil, drivers.ErrTaskNotFound 427 } 428 429 ch := make(chan *drivers.ExitResult) 430 go d.handleWait(ctx, handle, ch) 431 432 return ch, nil 433 } 434 435 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 436 defer close(ch) 437 var result *drivers.ExitResult 438 ps, err := handle.exec.Wait(ctx) 439 if err != nil { 440 result = &drivers.ExitResult{ 441 Err: fmt.Errorf("executor: error waiting on process: %v", err), 442 } 443 } else { 444 result = &drivers.ExitResult{ 445 ExitCode: ps.ExitCode, 446 Signal: ps.Signal, 447 } 448 } 449 450 select { 451 case <-ctx.Done(): 452 return 453 case <-d.ctx.Done(): 454 return 455 case ch <- result: 456 } 457 } 458 459 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 460 handle, ok := d.tasks.Get(taskID) 461 if !ok { 462 return drivers.ErrTaskNotFound 463 } 464 465 if err := handle.exec.Shutdown(signal, timeout); err != nil { 466 if handle.pluginClient.Exited() { 467 return nil 468 } 469 return fmt.Errorf("executor Shutdown failed: %v", err) 470 } 471 472 return nil 473 } 474 475 func (d *Driver) DestroyTask(taskID string, force bool) error { 476 handle, ok := d.tasks.Get(taskID) 477 if !ok { 478 return drivers.ErrTaskNotFound 479 } 480 481 if handle.IsRunning() && !force { 482 return fmt.Errorf("cannot destroy running task") 483 } 484 485 if !handle.pluginClient.Exited() { 486 if err := handle.exec.Shutdown("", 0); err != nil { 487 handle.logger.Error("destroying executor failed", "err", err) 488 } 489 490 handle.pluginClient.Kill() 491 } 492 493 d.tasks.Delete(taskID) 494 return nil 495 } 496 497 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 498 handle, ok := d.tasks.Get(taskID) 499 if !ok { 500 return nil, drivers.ErrTaskNotFound 501 } 502 503 return handle.TaskStatus(), nil 504 } 505 506 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 507 handle, ok := d.tasks.Get(taskID) 508 if !ok { 509 return nil, drivers.ErrTaskNotFound 510 } 511 512 return handle.exec.Stats(ctx, interval) 513 } 514 515 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 516 return d.eventer.TaskEvents(ctx) 517 } 518 519 func (d *Driver) SignalTask(taskID string, signal string) error { 520 handle, ok := d.tasks.Get(taskID) 521 if !ok { 522 return drivers.ErrTaskNotFound 523 } 524 525 sig := os.Interrupt 526 if s, ok := signals.SignalLookup[signal]; ok { 527 sig = s 528 } else { 529 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 530 531 } 532 return handle.exec.Signal(sig) 533 } 534 535 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 536 if len(cmd) == 0 { 537 return nil, fmt.Errorf("error cmd must have at least one value") 538 } 539 handle, ok := d.tasks.Get(taskID) 540 if !ok { 541 return nil, drivers.ErrTaskNotFound 542 } 543 544 args := []string{} 545 if len(cmd) > 1 { 546 args = cmd[1:] 547 } 548 549 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args) 550 if err != nil { 551 return nil, err 552 } 553 554 return &drivers.ExecTaskResult{ 555 Stdout: out, 556 ExitResult: &drivers.ExitResult{ 557 ExitCode: exitCode, 558 }, 559 }, nil 560 } 561 562 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 563 564 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 565 taskID string, 566 command []string, 567 tty bool, 568 stream drivers.ExecTaskStream) error { 569 570 if len(command) == 0 { 571 return fmt.Errorf("error cmd must have at least one value") 572 } 573 handle, ok := d.tasks.Get(taskID) 574 if !ok { 575 return drivers.ErrTaskNotFound 576 } 577 578 return handle.exec.ExecStreaming(ctx, command, tty, stream) 579 }