github.com/manicqin/nomad@v0.9.5/plugins/drivers/client.go (about) 1 package drivers 2 3 import ( 4 "context" 5 "errors" 6 "io" 7 "time" 8 9 "github.com/LK4D4/joincontext" 10 "github.com/golang/protobuf/ptypes" 11 hclog "github.com/hashicorp/go-hclog" 12 cstructs "github.com/hashicorp/nomad/client/structs" 13 "github.com/hashicorp/nomad/helper/pluginutils/grpcutils" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/plugins/base" 16 "github.com/hashicorp/nomad/plugins/drivers/proto" 17 "github.com/hashicorp/nomad/plugins/shared/hclspec" 18 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 19 sproto "github.com/hashicorp/nomad/plugins/shared/structs/proto" 20 "google.golang.org/grpc/status" 21 ) 22 23 var _ DriverPlugin = &driverPluginClient{} 24 25 type driverPluginClient struct { 26 *base.BasePluginClient 27 28 client proto.DriverClient 29 logger hclog.Logger 30 31 // doneCtx is closed when the plugin exits 32 doneCtx context.Context 33 } 34 35 func (d *driverPluginClient) TaskConfigSchema() (*hclspec.Spec, error) { 36 req := &proto.TaskConfigSchemaRequest{} 37 38 resp, err := d.client.TaskConfigSchema(d.doneCtx, req) 39 if err != nil { 40 return nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 41 } 42 43 return resp.Spec, nil 44 } 45 46 func (d *driverPluginClient) Capabilities() (*Capabilities, error) { 47 req := &proto.CapabilitiesRequest{} 48 49 resp, err := d.client.Capabilities(d.doneCtx, req) 50 if err != nil { 51 return nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 52 } 53 54 caps := &Capabilities{} 55 if resp.Capabilities != nil { 56 caps.SendSignals = resp.Capabilities.SendSignals 57 caps.Exec = resp.Capabilities.Exec 58 caps.MustInitiateNetwork = resp.Capabilities.MustCreateNetwork 59 60 for _, mode := range resp.Capabilities.NetworkIsolationModes { 61 caps.NetIsolationModes = append(caps.NetIsolationModes, netIsolationModeFromProto(mode)) 62 } 63 64 switch resp.Capabilities.FsIsolation { 65 case proto.DriverCapabilities_NONE: 66 caps.FSIsolation = FSIsolationNone 67 case proto.DriverCapabilities_CHROOT: 68 caps.FSIsolation = FSIsolationChroot 69 case proto.DriverCapabilities_IMAGE: 70 caps.FSIsolation = FSIsolationImage 71 default: 72 caps.FSIsolation = FSIsolationNone 73 } 74 } 75 76 return caps, nil 77 } 78 79 // Fingerprint the driver, return a chan that will be pushed to periodically and on changes to health 80 func (d *driverPluginClient) Fingerprint(ctx context.Context) (<-chan *Fingerprint, error) { 81 req := &proto.FingerprintRequest{} 82 83 // Join the passed context and the shutdown context 84 joinedCtx, _ := joincontext.Join(ctx, d.doneCtx) 85 86 stream, err := d.client.Fingerprint(joinedCtx, req) 87 if err != nil { 88 return nil, grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx) 89 } 90 91 ch := make(chan *Fingerprint, 1) 92 go d.handleFingerprint(ctx, ch, stream) 93 94 return ch, nil 95 } 96 97 func (d *driverPluginClient) handleFingerprint(reqCtx context.Context, ch chan *Fingerprint, stream proto.Driver_FingerprintClient) { 98 defer close(ch) 99 for { 100 pb, err := stream.Recv() 101 if err != nil { 102 if err != io.EOF { 103 ch <- &Fingerprint{ 104 Err: grpcutils.HandleReqCtxGrpcErr(err, reqCtx, d.doneCtx), 105 } 106 } 107 108 // End the stream 109 return 110 } 111 112 f := &Fingerprint{ 113 Attributes: pstructs.ConvertProtoAttributeMap(pb.Attributes), 114 Health: healthStateFromProto(pb.Health), 115 HealthDescription: pb.HealthDescription, 116 } 117 118 select { 119 case <-reqCtx.Done(): 120 return 121 case ch <- f: 122 } 123 } 124 } 125 126 // RecoverTask does internal state recovery to be able to control the task of 127 // the given TaskHandle 128 func (d *driverPluginClient) RecoverTask(h *TaskHandle) error { 129 req := &proto.RecoverTaskRequest{Handle: taskHandleToProto(h)} 130 131 _, err := d.client.RecoverTask(d.doneCtx, req) 132 return grpcutils.HandleGrpcErr(err, d.doneCtx) 133 } 134 135 // StartTask starts execution of a task with the given TaskConfig. A TaskHandle 136 // is returned to the caller that can be used to recover state of the task, 137 // should the driver crash or exit prematurely. 138 func (d *driverPluginClient) StartTask(c *TaskConfig) (*TaskHandle, *DriverNetwork, error) { 139 req := &proto.StartTaskRequest{ 140 Task: taskConfigToProto(c), 141 } 142 143 resp, err := d.client.StartTask(d.doneCtx, req) 144 if err != nil { 145 st := status.Convert(err) 146 if len(st.Details()) > 0 { 147 if rec, ok := st.Details()[0].(*sproto.RecoverableError); ok { 148 return nil, nil, structs.NewRecoverableError(err, rec.Recoverable) 149 } 150 } 151 return nil, nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 152 } 153 154 var net *DriverNetwork 155 if resp.NetworkOverride != nil { 156 net = &DriverNetwork{ 157 PortMap: map[string]int{}, 158 IP: resp.NetworkOverride.Addr, 159 AutoAdvertise: resp.NetworkOverride.AutoAdvertise, 160 } 161 for k, v := range resp.NetworkOverride.PortMap { 162 net.PortMap[k] = int(v) 163 } 164 } 165 166 return taskHandleFromProto(resp.Handle), net, nil 167 } 168 169 // WaitTask returns a channel that will have an ExitResult pushed to it once when the task 170 // exits on its own or is killed. If WaitTask is called after the task has exited, the channel 171 // will immedialy return the ExitResult. WaitTask can be called multiple times for 172 // the same task without issue. 173 func (d *driverPluginClient) WaitTask(ctx context.Context, id string) (<-chan *ExitResult, error) { 174 ch := make(chan *ExitResult) 175 go d.handleWaitTask(ctx, id, ch) 176 return ch, nil 177 } 178 179 func (d *driverPluginClient) handleWaitTask(ctx context.Context, id string, ch chan *ExitResult) { 180 defer close(ch) 181 var result ExitResult 182 req := &proto.WaitTaskRequest{ 183 TaskId: id, 184 } 185 186 // Join the passed context and the shutdown context 187 joinedCtx, _ := joincontext.Join(ctx, d.doneCtx) 188 189 resp, err := d.client.WaitTask(joinedCtx, req) 190 if err != nil { 191 result.Err = grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx) 192 } else { 193 result.ExitCode = int(resp.Result.ExitCode) 194 result.Signal = int(resp.Result.Signal) 195 result.OOMKilled = resp.Result.OomKilled 196 if len(resp.Err) > 0 { 197 result.Err = errors.New(resp.Err) 198 } 199 } 200 ch <- &result 201 } 202 203 // StopTask stops the task with the given taskID. A timeout and signal can be 204 // given to control a graceful termination of the task. The driver will send the 205 // given signal to the task and wait for the given timeout for it to exit. If the 206 // task does not exit within the timeout it will be forcefully killed. 207 func (d *driverPluginClient) StopTask(taskID string, timeout time.Duration, signal string) error { 208 req := &proto.StopTaskRequest{ 209 TaskId: taskID, 210 Timeout: ptypes.DurationProto(timeout), 211 Signal: signal, 212 } 213 214 _, err := d.client.StopTask(d.doneCtx, req) 215 return grpcutils.HandleGrpcErr(err, d.doneCtx) 216 } 217 218 // DestroyTask removes the task from the driver's in memory state. The task 219 // cannot be running unless force is set to true. If force is set to true the 220 // driver will forcefully terminate the task before removing it. 221 func (d *driverPluginClient) DestroyTask(taskID string, force bool) error { 222 req := &proto.DestroyTaskRequest{ 223 TaskId: taskID, 224 Force: force, 225 } 226 227 _, err := d.client.DestroyTask(d.doneCtx, req) 228 return grpcutils.HandleGrpcErr(err, d.doneCtx) 229 } 230 231 // InspectTask returns status information for a task 232 func (d *driverPluginClient) InspectTask(taskID string) (*TaskStatus, error) { 233 req := &proto.InspectTaskRequest{TaskId: taskID} 234 235 resp, err := d.client.InspectTask(d.doneCtx, req) 236 if err != nil { 237 return nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 238 } 239 240 status, err := taskStatusFromProto(resp.Task) 241 if err != nil { 242 return nil, err 243 } 244 245 if resp.Driver != nil { 246 status.DriverAttributes = resp.Driver.Attributes 247 } 248 if resp.NetworkOverride != nil { 249 status.NetworkOverride = &DriverNetwork{ 250 PortMap: map[string]int{}, 251 IP: resp.NetworkOverride.Addr, 252 AutoAdvertise: resp.NetworkOverride.AutoAdvertise, 253 } 254 for k, v := range resp.NetworkOverride.PortMap { 255 status.NetworkOverride.PortMap[k] = int(v) 256 } 257 } 258 259 return status, nil 260 } 261 262 // TaskStats returns resource usage statistics for the task 263 func (d *driverPluginClient) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 264 req := &proto.TaskStatsRequest{ 265 TaskId: taskID, 266 CollectionInterval: ptypes.DurationProto(interval), 267 } 268 ctx, _ = joincontext.Join(ctx, d.doneCtx) 269 stream, err := d.client.TaskStats(ctx, req) 270 if err != nil { 271 st := status.Convert(err) 272 if len(st.Details()) > 0 { 273 if rec, ok := st.Details()[0].(*sproto.RecoverableError); ok { 274 return nil, structs.NewRecoverableError(err, rec.Recoverable) 275 } 276 } 277 return nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 278 } 279 280 ch := make(chan *cstructs.TaskResourceUsage, 1) 281 go d.handleStats(ctx, ch, stream) 282 283 return ch, nil 284 } 285 286 func (d *driverPluginClient) handleStats(ctx context.Context, ch chan<- *cstructs.TaskResourceUsage, stream proto.Driver_TaskStatsClient) { 287 defer close(ch) 288 for { 289 resp, err := stream.Recv() 290 if ctx.Err() != nil { 291 // Context canceled; exit gracefully 292 return 293 } 294 295 if err != nil { 296 if err != io.EOF { 297 d.logger.Error("error receiving stream from TaskStats driver RPC, closing stream", "error", err) 298 } 299 300 // End of stream 301 return 302 } 303 304 stats, err := TaskStatsFromProto(resp.Stats) 305 if err != nil { 306 d.logger.Error("failed to decode stats from RPC", "error", err, "stats", resp.Stats) 307 continue 308 } 309 310 select { 311 case ch <- stats: 312 case <-ctx.Done(): 313 } 314 } 315 } 316 317 // TaskEvents returns a channel that will receive events from the driver about all 318 // tasks such as lifecycle events, terminal errors, etc. 319 func (d *driverPluginClient) TaskEvents(ctx context.Context) (<-chan *TaskEvent, error) { 320 req := &proto.TaskEventsRequest{} 321 322 // Join the passed context and the shutdown context 323 joinedCtx, _ := joincontext.Join(ctx, d.doneCtx) 324 325 stream, err := d.client.TaskEvents(joinedCtx, req) 326 if err != nil { 327 return nil, grpcutils.HandleReqCtxGrpcErr(err, ctx, d.doneCtx) 328 } 329 330 ch := make(chan *TaskEvent, 1) 331 go d.handleTaskEvents(ctx, ch, stream) 332 return ch, nil 333 } 334 335 func (d *driverPluginClient) handleTaskEvents(reqCtx context.Context, ch chan *TaskEvent, stream proto.Driver_TaskEventsClient) { 336 defer close(ch) 337 for { 338 ev, err := stream.Recv() 339 if err != nil { 340 if err != io.EOF { 341 ch <- &TaskEvent{ 342 Err: grpcutils.HandleReqCtxGrpcErr(err, reqCtx, d.doneCtx), 343 } 344 } 345 346 // End the stream 347 return 348 } 349 350 timestamp, _ := ptypes.Timestamp(ev.Timestamp) 351 event := &TaskEvent{ 352 TaskID: ev.TaskId, 353 AllocID: ev.AllocId, 354 TaskName: ev.TaskName, 355 Annotations: ev.Annotations, 356 Message: ev.Message, 357 Timestamp: timestamp, 358 } 359 select { 360 case <-reqCtx.Done(): 361 return 362 case ch <- event: 363 } 364 } 365 } 366 367 // SignalTask will send the given signal to the specified task 368 func (d *driverPluginClient) SignalTask(taskID string, signal string) error { 369 req := &proto.SignalTaskRequest{ 370 TaskId: taskID, 371 Signal: signal, 372 } 373 _, err := d.client.SignalTask(d.doneCtx, req) 374 return grpcutils.HandleGrpcErr(err, d.doneCtx) 375 } 376 377 // ExecTask will run the given command within the execution context of the task. 378 // The driver will wait for the given timeout for the command to complete before 379 // terminating it. The stdout and stderr of the command will be return to the caller, 380 // along with other exit information such as exit code. 381 func (d *driverPluginClient) ExecTask(taskID string, cmd []string, timeout time.Duration) (*ExecTaskResult, error) { 382 req := &proto.ExecTaskRequest{ 383 TaskId: taskID, 384 Command: cmd, 385 Timeout: ptypes.DurationProto(timeout), 386 } 387 388 resp, err := d.client.ExecTask(d.doneCtx, req) 389 if err != nil { 390 return nil, grpcutils.HandleGrpcErr(err, d.doneCtx) 391 } 392 393 result := &ExecTaskResult{ 394 Stdout: resp.Stdout, 395 Stderr: resp.Stderr, 396 ExitResult: exitResultFromProto(resp.Result), 397 } 398 399 return result, nil 400 } 401 402 var _ ExecTaskStreamingRawDriver = (*driverPluginClient)(nil) 403 404 func (d *driverPluginClient) ExecTaskStreamingRaw(ctx context.Context, 405 taskID string, 406 command []string, 407 tty bool, 408 execStream ExecTaskStream) error { 409 410 stream, err := d.client.ExecTaskStreaming(ctx) 411 if err != nil { 412 return grpcutils.HandleGrpcErr(err, d.doneCtx) 413 } 414 415 err = stream.Send(&proto.ExecTaskStreamingRequest{ 416 Setup: &proto.ExecTaskStreamingRequest_Setup{ 417 TaskId: taskID, 418 Command: command, 419 Tty: tty, 420 }, 421 }) 422 if err != nil { 423 return grpcutils.HandleGrpcErr(err, d.doneCtx) 424 } 425 426 errCh := make(chan error, 1) 427 428 go func() { 429 for { 430 m, err := execStream.Recv() 431 if err == io.EOF { 432 return 433 } else if err != nil { 434 errCh <- err 435 return 436 } 437 438 if err := stream.Send(m); err != nil { 439 errCh <- err 440 return 441 } 442 443 } 444 }() 445 446 for { 447 select { 448 case err := <-errCh: 449 return err 450 default: 451 } 452 453 m, err := stream.Recv() 454 if err == io.EOF { 455 // Once we get to the end of stream successfully, we can ignore errCh: 456 // e.g. input write failures after process terminates shouldn't cause method to fail 457 return nil 458 } else if err != nil { 459 return err 460 } 461 462 if err := execStream.Send(m); err != nil { 463 return err 464 } 465 } 466 } 467 468 func (d *driverPluginClient) CreateNetwork(allocID string) (*NetworkIsolationSpec, bool, error) { 469 req := &proto.CreateNetworkRequest{ 470 AllocId: allocID, 471 } 472 473 resp, err := d.client.CreateNetwork(d.doneCtx, req) 474 if err != nil { 475 return nil, false, grpcutils.HandleGrpcErr(err, d.doneCtx) 476 } 477 478 return NetworkIsolationSpecFromProto(resp.IsolationSpec), resp.Created, nil 479 } 480 481 func (d *driverPluginClient) DestroyNetwork(allocID string, spec *NetworkIsolationSpec) error { 482 req := &proto.DestroyNetworkRequest{ 483 AllocId: allocID, 484 IsolationSpec: NetworkIsolationSpecToProto(spec), 485 } 486 487 _, err := d.client.DestroyNetwork(d.doneCtx, req) 488 if err != nil { 489 return grpcutils.HandleGrpcErr(err, d.doneCtx) 490 } 491 492 return nil 493 }