github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/rawexec/driver.go (about) 1 package rawexec 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "runtime" 9 "strconv" 10 "syscall" 11 "time" 12 13 "github.com/hashicorp/consul-template/signals" 14 "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/helper/pluginutils/loader" 18 "github.com/hashicorp/nomad/plugins/base" 19 "github.com/hashicorp/nomad/plugins/drivers" 20 "github.com/hashicorp/nomad/plugins/shared/hclspec" 21 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 22 ) 23 24 const ( 25 // pluginName is the name of the plugin 26 pluginName = "raw_exec" 27 28 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 29 fingerprintPeriod = 30 * time.Second 30 31 // taskHandleVersion is the version of task handle which this driver sets 32 // and understands how to decode driver state 33 taskHandleVersion = 1 34 ) 35 36 var ( 37 // PluginID is the rawexec plugin metadata registered in the plugin 38 // catalog. 39 PluginID = loader.PluginID{ 40 Name: pluginName, 41 PluginType: base.PluginTypeDriver, 42 } 43 44 // PluginConfig is the rawexec factory function registered in the 45 // plugin catalog. 46 PluginConfig = &loader.InternalPluginConfig{ 47 Config: map[string]interface{}{}, 48 Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewRawExecDriver(ctx, l) }, 49 } 50 51 errDisabledDriver = fmt.Errorf("raw_exec is disabled") 52 ) 53 54 // PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options. 55 func PluginLoader(opts map[string]string) (map[string]interface{}, error) { 56 conf := map[string]interface{}{} 57 if v, err := strconv.ParseBool(opts["driver.raw_exec.enable"]); err == nil { 58 conf["enabled"] = v 59 } 60 if v, err := strconv.ParseBool(opts["driver.raw_exec.no_cgroups"]); err == nil { 61 conf["no_cgroups"] = v 62 } 63 return conf, nil 64 } 65 66 var ( 67 // pluginInfo is the response returned for the PluginInfo RPC 68 pluginInfo = &base.PluginInfoResponse{ 69 Type: base.PluginTypeDriver, 70 PluginApiVersions: []string{drivers.ApiVersion010}, 71 PluginVersion: "0.1.0", 72 Name: pluginName, 73 } 74 75 // configSpec is the hcl specification returned by the ConfigSchema RPC 76 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 77 "enabled": hclspec.NewDefault( 78 hclspec.NewAttr("enabled", "bool", false), 79 hclspec.NewLiteral("false"), 80 ), 81 "no_cgroups": hclspec.NewDefault( 82 hclspec.NewAttr("no_cgroups", "bool", false), 83 hclspec.NewLiteral("false"), 84 ), 85 }) 86 87 // taskConfigSpec is the hcl specification for the driver config section of 88 // a task within a job. It is returned in the TaskConfigSchema RPC 89 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 90 "command": hclspec.NewAttr("command", "string", true), 91 "args": hclspec.NewAttr("args", "list(string)", false), 92 }) 93 94 // capabilities is returned by the Capabilities RPC and indicates what 95 // optional features this driver supports 96 capabilities = &drivers.Capabilities{ 97 SendSignals: true, 98 Exec: true, 99 FSIsolation: drivers.FSIsolationNone, 100 NetIsolationModes: []drivers.NetIsolationMode{ 101 drivers.NetIsolationModeHost, 102 drivers.NetIsolationModeGroup, 103 }, 104 MountConfigs: drivers.MountConfigSupportNone, 105 } 106 ) 107 108 // Driver is a privileged version of the exec driver. It provides no 109 // resource isolation and just fork/execs. The Exec driver should be preferred 110 // and this should only be used when explicitly needed. 111 type Driver struct { 112 // eventer is used to handle multiplexing of TaskEvents calls such that an 113 // event can be broadcast to all callers 114 eventer *eventer.Eventer 115 116 // config is the driver configuration set by the SetConfig RPC 117 config *Config 118 119 // nomadConfig is the client config from nomad 120 nomadConfig *base.ClientDriverConfig 121 122 // tasks is the in memory datastore mapping taskIDs to driverHandles 123 tasks *taskStore 124 125 // ctx is the context for the driver. It is passed to other subsystems to 126 // coordinate shutdown 127 ctx context.Context 128 129 // logger will log to the Nomad agent 130 logger hclog.Logger 131 } 132 133 // Config is the driver configuration set by the SetConfig RPC call 134 type Config struct { 135 // NoCgroups tracks whether we should use a cgroup to manage the process 136 // tree 137 NoCgroups bool `codec:"no_cgroups"` 138 139 // Enabled is set to true to enable the raw_exec driver 140 Enabled bool `codec:"enabled"` 141 } 142 143 // TaskConfig is the driver configuration of a task within a job 144 type TaskConfig struct { 145 Command string `codec:"command"` 146 Args []string `codec:"args"` 147 } 148 149 // TaskState is the state which is encoded in the handle returned in 150 // StartTask. This information is needed to rebuild the task state and handler 151 // during recovery. 152 type TaskState struct { 153 ReattachConfig *pstructs.ReattachConfig 154 TaskConfig *drivers.TaskConfig 155 Pid int 156 StartedAt time.Time 157 } 158 159 // NewRawExecDriver returns a new DriverPlugin implementation 160 func NewRawExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { 161 logger = logger.Named(pluginName) 162 return &Driver{ 163 eventer: eventer.NewEventer(ctx, logger), 164 config: &Config{}, 165 tasks: newTaskStore(), 166 ctx: ctx, 167 logger: logger, 168 } 169 } 170 171 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 172 return pluginInfo, nil 173 } 174 175 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 176 return configSpec, nil 177 } 178 179 func (d *Driver) SetConfig(cfg *base.Config) error { 180 var config Config 181 if len(cfg.PluginConfig) != 0 { 182 if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { 183 return err 184 } 185 } 186 187 d.config = &config 188 if cfg.AgentConfig != nil { 189 d.nomadConfig = cfg.AgentConfig.Driver 190 } 191 return nil 192 } 193 194 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 195 return taskConfigSpec, nil 196 } 197 198 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 199 return capabilities, nil 200 } 201 202 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 203 ch := make(chan *drivers.Fingerprint) 204 go d.handleFingerprint(ctx, ch) 205 return ch, nil 206 } 207 208 func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) { 209 defer close(ch) 210 ticker := time.NewTimer(0) 211 for { 212 select { 213 case <-ctx.Done(): 214 return 215 case <-d.ctx.Done(): 216 return 217 case <-ticker.C: 218 ticker.Reset(fingerprintPeriod) 219 ch <- d.buildFingerprint() 220 } 221 } 222 } 223 224 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 225 var health drivers.HealthState 226 var desc string 227 attrs := map[string]*pstructs.Attribute{} 228 if d.config.Enabled { 229 health = drivers.HealthStateHealthy 230 desc = drivers.DriverHealthy 231 attrs["driver.raw_exec"] = pstructs.NewBoolAttribute(true) 232 } else { 233 health = drivers.HealthStateUndetected 234 desc = "disabled" 235 } 236 237 return &drivers.Fingerprint{ 238 Attributes: attrs, 239 Health: health, 240 HealthDescription: desc, 241 } 242 } 243 244 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 245 if handle == nil { 246 return fmt.Errorf("handle cannot be nil") 247 } 248 249 // If already attached to handle there's nothing to recover. 250 if _, ok := d.tasks.Get(handle.Config.ID); ok { 251 d.logger.Trace("nothing to recover; task already exists", 252 "task_id", handle.Config.ID, 253 "task_name", handle.Config.Name, 254 ) 255 return nil 256 } 257 258 // Handle doesn't already exist, try to reattach 259 var taskState TaskState 260 if err := handle.GetDriverState(&taskState); err != nil { 261 d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID) 262 return fmt.Errorf("failed to decode task state from handle: %v", err) 263 } 264 265 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 266 if err != nil { 267 d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID) 268 return fmt.Errorf("failed to build ReattachConfig from task state: %v", err) 269 } 270 271 // Create client for reattached executor 272 exec, pluginClient, err := executor.ReattachToExecutor(plugRC, 273 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 274 if err != nil { 275 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 276 return fmt.Errorf("failed to reattach to executor: %v", err) 277 } 278 279 h := &taskHandle{ 280 exec: exec, 281 pid: taskState.Pid, 282 pluginClient: pluginClient, 283 taskConfig: taskState.TaskConfig, 284 procState: drivers.TaskStateRunning, 285 startedAt: taskState.StartedAt, 286 exitResult: &drivers.ExitResult{}, 287 logger: d.logger, 288 doneCh: make(chan struct{}), 289 } 290 291 d.tasks.Set(taskState.TaskConfig.ID, h) 292 293 go h.run() 294 return nil 295 } 296 297 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 298 if !d.config.Enabled { 299 return nil, nil, errDisabledDriver 300 } 301 302 if _, ok := d.tasks.Get(cfg.ID); ok { 303 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 304 } 305 306 var driverConfig TaskConfig 307 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 308 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 309 } 310 311 d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig)) 312 handle := drivers.NewTaskHandle(taskHandleVersion) 313 handle.Config = cfg 314 315 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 316 executorConfig := &executor.ExecutorConfig{ 317 LogFile: pluginLogFile, 318 LogLevel: "debug", 319 } 320 321 logger := d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID) 322 exec, pluginClient, err := executor.CreateExecutor(logger, d.nomadConfig, executorConfig) 323 if err != nil { 324 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 325 } 326 327 // Only use cgroups when running as root on linux - Doing so in other cases 328 // will cause an error. 329 useCgroups := !d.config.NoCgroups && runtime.GOOS == "linux" && syscall.Geteuid() == 0 330 331 execCmd := &executor.ExecCommand{ 332 Cmd: driverConfig.Command, 333 Args: driverConfig.Args, 334 Env: cfg.EnvList(), 335 User: cfg.User, 336 BasicProcessCgroup: useCgroups, 337 TaskDir: cfg.TaskDir().Dir, 338 StdoutPath: cfg.StdoutPath, 339 StderrPath: cfg.StderrPath, 340 NetworkIsolation: cfg.NetworkIsolation, 341 } 342 343 ps, err := exec.Launch(execCmd) 344 if err != nil { 345 pluginClient.Kill() 346 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 347 } 348 349 h := &taskHandle{ 350 exec: exec, 351 pid: ps.Pid, 352 pluginClient: pluginClient, 353 taskConfig: cfg, 354 procState: drivers.TaskStateRunning, 355 startedAt: time.Now().Round(time.Millisecond), 356 logger: d.logger, 357 doneCh: make(chan struct{}), 358 } 359 360 driverState := TaskState{ 361 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 362 Pid: ps.Pid, 363 TaskConfig: cfg, 364 StartedAt: h.startedAt, 365 } 366 367 if err := handle.SetDriverState(&driverState); err != nil { 368 d.logger.Error("failed to start task, error setting driver state", "error", err) 369 _ = exec.Shutdown("", 0) 370 pluginClient.Kill() 371 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 372 } 373 374 d.tasks.Set(cfg.ID, h) 375 go h.run() 376 return handle, nil, nil 377 } 378 379 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 380 handle, ok := d.tasks.Get(taskID) 381 if !ok { 382 return nil, drivers.ErrTaskNotFound 383 } 384 385 ch := make(chan *drivers.ExitResult) 386 go d.handleWait(ctx, handle, ch) 387 388 return ch, nil 389 } 390 391 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 392 defer close(ch) 393 var result *drivers.ExitResult 394 ps, err := handle.exec.Wait(ctx) 395 if err != nil { 396 result = &drivers.ExitResult{ 397 Err: fmt.Errorf("executor: error waiting on process: %v", err), 398 } 399 } else { 400 result = &drivers.ExitResult{ 401 ExitCode: ps.ExitCode, 402 Signal: ps.Signal, 403 } 404 } 405 406 select { 407 case <-ctx.Done(): 408 return 409 case <-d.ctx.Done(): 410 return 411 case ch <- result: 412 } 413 } 414 415 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 416 handle, ok := d.tasks.Get(taskID) 417 if !ok { 418 return drivers.ErrTaskNotFound 419 } 420 421 if err := handle.exec.Shutdown(signal, timeout); err != nil { 422 if handle.pluginClient.Exited() { 423 return nil 424 } 425 return fmt.Errorf("executor Shutdown failed: %v", err) 426 } 427 428 // Wait for handle to finish 429 <-handle.doneCh 430 431 // Kill executor 432 handle.pluginClient.Kill() 433 434 return nil 435 } 436 437 func (d *Driver) DestroyTask(taskID string, force bool) error { 438 handle, ok := d.tasks.Get(taskID) 439 if !ok { 440 return drivers.ErrTaskNotFound 441 } 442 443 if handle.IsRunning() && !force { 444 return fmt.Errorf("cannot destroy running task") 445 } 446 447 if !handle.pluginClient.Exited() { 448 if err := handle.exec.Shutdown("", 0); err != nil { 449 handle.logger.Error("destroying executor failed", "error", err) 450 } 451 452 handle.pluginClient.Kill() 453 } 454 455 d.tasks.Delete(taskID) 456 return nil 457 } 458 459 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 460 handle, ok := d.tasks.Get(taskID) 461 if !ok { 462 return nil, drivers.ErrTaskNotFound 463 } 464 465 return handle.TaskStatus(), nil 466 } 467 468 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 469 handle, ok := d.tasks.Get(taskID) 470 if !ok { 471 return nil, drivers.ErrTaskNotFound 472 } 473 474 return handle.exec.Stats(ctx, interval) 475 } 476 477 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 478 return d.eventer.TaskEvents(ctx) 479 } 480 481 func (d *Driver) SignalTask(taskID string, signal string) error { 482 handle, ok := d.tasks.Get(taskID) 483 if !ok { 484 return drivers.ErrTaskNotFound 485 } 486 487 sig := os.Interrupt 488 if s, ok := signals.SignalLookup[signal]; ok { 489 sig = s 490 } else { 491 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 492 } 493 494 return handle.exec.Signal(sig) 495 } 496 497 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 498 if len(cmd) == 0 { 499 return nil, fmt.Errorf("error cmd must have at least one value") 500 } 501 handle, ok := d.tasks.Get(taskID) 502 if !ok { 503 return nil, drivers.ErrTaskNotFound 504 } 505 506 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:]) 507 if err != nil { 508 return nil, err 509 } 510 511 return &drivers.ExecTaskResult{ 512 Stdout: out, 513 ExitResult: &drivers.ExitResult{ 514 ExitCode: exitCode, 515 }, 516 }, nil 517 } 518 519 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 520 521 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 522 taskID string, 523 command []string, 524 tty bool, 525 stream drivers.ExecTaskStream) error { 526 527 if len(command) == 0 { 528 return fmt.Errorf("error cmd must have at least one value") 529 } 530 handle, ok := d.tasks.Get(taskID) 531 if !ok { 532 return drivers.ErrTaskNotFound 533 } 534 535 return handle.exec.ExecStreaming(ctx, command, tty, stream) 536 }