github.com/bigcommerce/nomad@v0.9.3-bc/drivers/java/driver.go (about) 1 package java 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "runtime" 10 "time" 11 12 "github.com/hashicorp/consul-template/signals" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/client/fingerprint" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/helper/pluginutils/loader" 18 "github.com/hashicorp/nomad/plugins/base" 19 "github.com/hashicorp/nomad/plugins/drivers" 20 "github.com/hashicorp/nomad/plugins/drivers/utils" 21 "github.com/hashicorp/nomad/plugins/shared/hclspec" 22 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 23 ) 24 25 const ( 26 // pluginName is the name of the plugin 27 pluginName = "java" 28 29 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 30 fingerprintPeriod = 30 * time.Second 31 32 // The key populated in Node Attributes to indicate presence of the Java driver 33 driverAttr = "driver.java" 34 driverVersionAttr = "driver.java.version" 35 36 // taskHandleVersion is the version of task handle which this driver sets 37 // and understands how to decode driver state 38 taskHandleVersion = 1 39 ) 40 41 var ( 42 // PluginID is the java plugin metadata registered in the plugin 43 // catalog. 44 PluginID = loader.PluginID{ 45 Name: pluginName, 46 PluginType: base.PluginTypeDriver, 47 } 48 49 // PluginConfig is the java driver factory function registered in the 50 // plugin catalog. 51 PluginConfig = &loader.InternalPluginConfig{ 52 Config: map[string]interface{}{}, 53 Factory: func(l hclog.Logger) interface{} { return NewDriver(l) }, 54 } 55 56 // pluginInfo is the response returned for the PluginInfo RPC 57 pluginInfo = &base.PluginInfoResponse{ 58 Type: base.PluginTypeDriver, 59 PluginApiVersions: []string{drivers.ApiVersion010}, 60 PluginVersion: "0.1.0", 61 Name: pluginName, 62 } 63 64 // configSpec is the hcl specification returned by the ConfigSchema RPC 65 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{}) 66 67 // taskConfigSpec is the hcl specification for the driver config section of 68 // a taskConfig within a job. It is returned in the TaskConfigSchema RPC 69 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 70 // It's required for either `class` or `jar_path` to be set, 71 // but that's not expressable in hclspec. Marking both as optional 72 // and setting checking explicitly later 73 "class": hclspec.NewAttr("class", "string", false), 74 "class_path": hclspec.NewAttr("class_path", "string", false), 75 "jar_path": hclspec.NewAttr("jar_path", "string", false), 76 "jvm_options": hclspec.NewAttr("jvm_options", "list(string)", false), 77 "args": hclspec.NewAttr("args", "list(string)", false), 78 }) 79 80 // capabilities is returned by the Capabilities RPC and indicates what 81 // optional features this driver supports 82 capabilities = &drivers.Capabilities{ 83 SendSignals: false, 84 Exec: false, 85 FSIsolation: drivers.FSIsolationNone, 86 } 87 88 _ drivers.DriverPlugin = (*Driver)(nil) 89 ) 90 91 func init() { 92 if runtime.GOOS == "linux" { 93 capabilities.FSIsolation = drivers.FSIsolationChroot 94 } 95 } 96 97 // TaskConfig is the driver configuration of a taskConfig within a job 98 type TaskConfig struct { 99 Class string `codec:"class"` 100 ClassPath string `codec:"class_path"` 101 JarPath string `codec:"jar_path"` 102 JvmOpts []string `codec:"jvm_options"` 103 Args []string `codec:"args"` // extra arguments to java executable 104 } 105 106 // TaskState is the state which is encoded in the handle returned in 107 // StartTask. This information is needed to rebuild the taskConfig state and handler 108 // during recovery. 109 type TaskState struct { 110 ReattachConfig *pstructs.ReattachConfig 111 TaskConfig *drivers.TaskConfig 112 Pid int 113 StartedAt time.Time 114 } 115 116 // Driver is a driver for running images via Java 117 type Driver struct { 118 // eventer is used to handle multiplexing of TaskEvents calls such that an 119 // event can be broadcast to all callers 120 eventer *eventer.Eventer 121 122 // tasks is the in memory datastore mapping taskIDs to taskHandle 123 tasks *taskStore 124 125 // ctx is the context for the driver. It is passed to other subsystems to 126 // coordinate shutdown 127 ctx context.Context 128 129 // nomadConf is the client agent's configuration 130 nomadConfig *base.ClientDriverConfig 131 132 // signalShutdown is called when the driver is shutting down and cancels the 133 // ctx passed to any subsystems 134 signalShutdown context.CancelFunc 135 136 // logger will log to the Nomad agent 137 logger hclog.Logger 138 } 139 140 func NewDriver(logger hclog.Logger) drivers.DriverPlugin { 141 ctx, cancel := context.WithCancel(context.Background()) 142 logger = logger.Named(pluginName) 143 return &Driver{ 144 eventer: eventer.NewEventer(ctx, logger), 145 tasks: newTaskStore(), 146 ctx: ctx, 147 signalShutdown: cancel, 148 logger: logger, 149 } 150 } 151 152 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 153 return pluginInfo, nil 154 } 155 156 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 157 return configSpec, nil 158 } 159 160 func (d *Driver) SetConfig(cfg *base.Config) error { 161 if cfg != nil && cfg.AgentConfig != nil { 162 d.nomadConfig = cfg.AgentConfig.Driver 163 } 164 return nil 165 } 166 167 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 168 return taskConfigSpec, nil 169 } 170 171 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 172 return capabilities, nil 173 } 174 175 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 176 ch := make(chan *drivers.Fingerprint) 177 go d.handleFingerprint(ctx, ch) 178 return ch, nil 179 } 180 181 func (d *Driver) handleFingerprint(ctx context.Context, ch chan *drivers.Fingerprint) { 182 ticker := time.NewTimer(0) 183 for { 184 select { 185 case <-ctx.Done(): 186 return 187 case <-d.ctx.Done(): 188 return 189 case <-ticker.C: 190 ticker.Reset(fingerprintPeriod) 191 ch <- d.buildFingerprint() 192 } 193 } 194 } 195 196 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 197 fp := &drivers.Fingerprint{ 198 Attributes: map[string]*pstructs.Attribute{}, 199 Health: drivers.HealthStateHealthy, 200 HealthDescription: drivers.DriverHealthy, 201 } 202 203 if runtime.GOOS == "linux" { 204 // Only enable if w are root and cgroups are mounted when running on linux system 205 if !utils.IsUnixRoot() { 206 fp.Health = drivers.HealthStateUndetected 207 fp.HealthDescription = drivers.DriverRequiresRootMessage 208 return fp 209 } 210 211 mount, err := fingerprint.FindCgroupMountpointDir() 212 if err != nil { 213 fp.Health = drivers.HealthStateUnhealthy 214 fp.HealthDescription = drivers.NoCgroupMountMessage 215 d.logger.Warn(fp.HealthDescription, "error", err) 216 return fp 217 } 218 219 if mount == "" { 220 fp.Health = drivers.HealthStateUnhealthy 221 fp.HealthDescription = drivers.CgroupMountEmpty 222 return fp 223 } 224 } 225 226 version, runtime, vm, err := javaVersionInfo() 227 if err != nil { 228 // return no error, as it isn't an error to not find java, it just means we 229 // can't use it. 230 fp.Health = drivers.HealthStateUndetected 231 fp.HealthDescription = "" 232 return fp 233 } 234 235 fp.Attributes[driverAttr] = pstructs.NewBoolAttribute(true) 236 fp.Attributes[driverVersionAttr] = pstructs.NewStringAttribute(version) 237 fp.Attributes["driver.java.runtime"] = pstructs.NewStringAttribute(runtime) 238 fp.Attributes["driver.java.vm"] = pstructs.NewStringAttribute(vm) 239 240 return fp 241 } 242 243 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 244 if handle == nil { 245 return fmt.Errorf("handle cannot be nil") 246 } 247 248 // COMPAT(0.10): pre 0.9 upgrade path check 249 if handle.Version == 0 { 250 return d.recoverPre09Task(handle) 251 } 252 253 // If already attached to handle there's nothing to recover. 254 if _, ok := d.tasks.Get(handle.Config.ID); ok { 255 d.logger.Debug("nothing to recover; task already exists", 256 "task_id", handle.Config.ID, 257 "task_name", handle.Config.Name, 258 ) 259 return nil 260 } 261 262 var taskState TaskState 263 if err := handle.GetDriverState(&taskState); err != nil { 264 d.logger.Error("failed to decode taskConfig state from handle", "error", err, "task_id", handle.Config.ID) 265 return fmt.Errorf("failed to decode taskConfig state from handle: %v", err) 266 } 267 268 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 269 if err != nil { 270 d.logger.Error("failed to build ReattachConfig from taskConfig state", "error", err, "task_id", handle.Config.ID) 271 return fmt.Errorf("failed to build ReattachConfig from taskConfig state: %v", err) 272 } 273 274 execImpl, pluginClient, err := executor.ReattachToExecutor(plugRC, 275 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 276 if err != nil { 277 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 278 return fmt.Errorf("failed to reattach to executor: %v", err) 279 } 280 281 h := &taskHandle{ 282 exec: execImpl, 283 pid: taskState.Pid, 284 pluginClient: pluginClient, 285 taskConfig: taskState.TaskConfig, 286 procState: drivers.TaskStateRunning, 287 startedAt: taskState.StartedAt, 288 exitResult: &drivers.ExitResult{}, 289 } 290 291 d.tasks.Set(taskState.TaskConfig.ID, h) 292 293 go h.run() 294 return nil 295 } 296 297 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 298 if _, ok := d.tasks.Get(cfg.ID); ok { 299 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 300 } 301 302 var driverConfig TaskConfig 303 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 304 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 305 } 306 307 if driverConfig.Class == "" && driverConfig.JarPath == "" { 308 return nil, nil, fmt.Errorf("jar_path or class must be specified") 309 } 310 311 absPath, err := GetAbsolutePath("java") 312 if err != nil { 313 return nil, nil, fmt.Errorf("failed to find java binary: %s", err) 314 } 315 316 args := javaCmdArgs(driverConfig) 317 318 d.logger.Info("starting java task", "driver_cfg", hclog.Fmt("%+v", driverConfig), "args", args) 319 320 handle := drivers.NewTaskHandle(taskHandleVersion) 321 handle.Config = cfg 322 323 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 324 executorConfig := &executor.ExecutorConfig{ 325 LogFile: pluginLogFile, 326 LogLevel: "debug", 327 FSIsolation: capabilities.FSIsolation == drivers.FSIsolationChroot, 328 } 329 330 exec, pluginClient, err := executor.CreateExecutor( 331 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID), 332 d.nomadConfig, executorConfig) 333 if err != nil { 334 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 335 } 336 337 user := cfg.User 338 if user == "" { 339 user = "nobody" 340 } 341 342 execCmd := &executor.ExecCommand{ 343 Cmd: absPath, 344 Args: args, 345 Env: cfg.EnvList(), 346 User: user, 347 ResourceLimits: true, 348 Resources: cfg.Resources, 349 TaskDir: cfg.TaskDir().Dir, 350 StdoutPath: cfg.StdoutPath, 351 StderrPath: cfg.StderrPath, 352 Mounts: cfg.Mounts, 353 Devices: cfg.Devices, 354 } 355 356 ps, err := exec.Launch(execCmd) 357 if err != nil { 358 pluginClient.Kill() 359 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 360 } 361 362 h := &taskHandle{ 363 exec: exec, 364 pid: ps.Pid, 365 pluginClient: pluginClient, 366 taskConfig: cfg, 367 procState: drivers.TaskStateRunning, 368 startedAt: time.Now().Round(time.Millisecond), 369 logger: d.logger, 370 } 371 372 driverState := TaskState{ 373 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 374 Pid: ps.Pid, 375 TaskConfig: cfg, 376 StartedAt: h.startedAt, 377 } 378 379 if err := handle.SetDriverState(&driverState); err != nil { 380 d.logger.Error("failed to start task, error setting driver state", "error", err) 381 exec.Shutdown("", 0) 382 pluginClient.Kill() 383 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 384 } 385 386 d.tasks.Set(cfg.ID, h) 387 go h.run() 388 return handle, nil, nil 389 } 390 391 func javaCmdArgs(driverConfig TaskConfig) []string { 392 args := []string{} 393 // Look for jvm options 394 if len(driverConfig.JvmOpts) != 0 { 395 args = append(args, driverConfig.JvmOpts...) 396 } 397 398 // Add the classpath 399 if driverConfig.ClassPath != "" { 400 args = append(args, "-cp", driverConfig.ClassPath) 401 } 402 403 // Add the jar 404 if driverConfig.JarPath != "" { 405 args = append(args, "-jar", driverConfig.JarPath) 406 } 407 408 // Add the class 409 if driverConfig.Class != "" { 410 args = append(args, driverConfig.Class) 411 } 412 413 // Add any args 414 if len(driverConfig.Args) != 0 { 415 args = append(args, driverConfig.Args...) 416 } 417 418 return args 419 } 420 421 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 422 handle, ok := d.tasks.Get(taskID) 423 if !ok { 424 return nil, drivers.ErrTaskNotFound 425 } 426 427 ch := make(chan *drivers.ExitResult) 428 go d.handleWait(ctx, handle, ch) 429 430 return ch, nil 431 } 432 433 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 434 defer close(ch) 435 var result *drivers.ExitResult 436 ps, err := handle.exec.Wait(ctx) 437 if err != nil { 438 result = &drivers.ExitResult{ 439 Err: fmt.Errorf("executor: error waiting on process: %v", err), 440 } 441 } else { 442 result = &drivers.ExitResult{ 443 ExitCode: ps.ExitCode, 444 Signal: ps.Signal, 445 } 446 } 447 448 select { 449 case <-ctx.Done(): 450 return 451 case <-d.ctx.Done(): 452 return 453 case ch <- result: 454 } 455 } 456 457 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 458 handle, ok := d.tasks.Get(taskID) 459 if !ok { 460 return drivers.ErrTaskNotFound 461 } 462 463 if err := handle.exec.Shutdown(signal, timeout); err != nil { 464 if handle.pluginClient.Exited() { 465 return nil 466 } 467 return fmt.Errorf("executor Shutdown failed: %v", err) 468 } 469 470 return nil 471 } 472 473 func (d *Driver) DestroyTask(taskID string, force bool) error { 474 handle, ok := d.tasks.Get(taskID) 475 if !ok { 476 return drivers.ErrTaskNotFound 477 } 478 479 if handle.IsRunning() && !force { 480 return fmt.Errorf("cannot destroy running task") 481 } 482 483 if !handle.pluginClient.Exited() { 484 if handle.IsRunning() { 485 if err := handle.exec.Shutdown("", 0); err != nil { 486 handle.logger.Error("destroying executor failed", "err", err) 487 } 488 } 489 490 handle.pluginClient.Kill() 491 } 492 493 d.tasks.Delete(taskID) 494 return nil 495 } 496 497 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 498 handle, ok := d.tasks.Get(taskID) 499 if !ok { 500 return nil, drivers.ErrTaskNotFound 501 } 502 503 return handle.TaskStatus(), nil 504 } 505 506 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 507 handle, ok := d.tasks.Get(taskID) 508 if !ok { 509 return nil, drivers.ErrTaskNotFound 510 } 511 512 return handle.exec.Stats(ctx, interval) 513 } 514 515 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 516 return d.eventer.TaskEvents(ctx) 517 } 518 519 func (d *Driver) SignalTask(taskID string, signal string) error { 520 handle, ok := d.tasks.Get(taskID) 521 if !ok { 522 return drivers.ErrTaskNotFound 523 } 524 525 sig := os.Interrupt 526 if s, ok := signals.SignalLookup[signal]; ok { 527 sig = s 528 } else { 529 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 530 531 } 532 return handle.exec.Signal(sig) 533 } 534 535 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 536 if len(cmd) == 0 { 537 return nil, fmt.Errorf("error cmd must have at least one value") 538 } 539 handle, ok := d.tasks.Get(taskID) 540 if !ok { 541 return nil, drivers.ErrTaskNotFound 542 } 543 544 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:]) 545 if err != nil { 546 return nil, err 547 } 548 549 return &drivers.ExecTaskResult{ 550 Stdout: out, 551 ExitResult: &drivers.ExitResult{ 552 ExitCode: exitCode, 553 }, 554 }, nil 555 } 556 557 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 558 559 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 560 taskID string, 561 command []string, 562 tty bool, 563 stream drivers.ExecTaskStream) error { 564 565 if len(command) == 0 { 566 return fmt.Errorf("error cmd must have at least one value") 567 } 568 handle, ok := d.tasks.Get(taskID) 569 if !ok { 570 return drivers.ErrTaskNotFound 571 } 572 573 return handle.exec.ExecStreaming(ctx, command, tty, stream) 574 } 575 576 // GetAbsolutePath returns the absolute path of the passed binary by resolving 577 // it in the path and following symlinks. 578 func GetAbsolutePath(bin string) (string, error) { 579 lp, err := exec.LookPath(bin) 580 if err != nil { 581 return "", fmt.Errorf("failed to resolve path to %q executable: %v", bin, err) 582 } 583 584 return filepath.EvalSymlinks(lp) 585 } 586 587 func (d *Driver) Shutdown() { 588 d.signalShutdown() 589 }