github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/dockerapi/controller.go (about) 1 package dockerapi 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "encoding/binary" 8 "fmt" 9 "io" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/events" 16 engineapi "github.com/docker/docker/client" 17 "github.com/docker/go-connections/nat" 18 "github.com/docker/swarmkit/agent/exec" 19 "github.com/docker/swarmkit/api" 20 "github.com/docker/swarmkit/log" 21 gogotypes "github.com/gogo/protobuf/types" 22 "github.com/pkg/errors" 23 "golang.org/x/time/rate" 24 ) 25 26 // controller implements agent.Controller against docker's API. 27 // 28 // Most operations against docker's API are done through the container name, 29 // which is unique to the task. 30 type controller struct { 31 task *api.Task 32 adapter *containerAdapter 33 closed chan struct{} 34 err error 35 36 pulled chan struct{} // closed after pull 37 cancelPull func() // cancels pull context if not nil 38 pullErr error // pull error, protected by close of pulled 39 } 40 41 var _ exec.Controller = &controller{} 42 43 // newController returns a docker exec controller for the provided task. 44 func newController(client engineapi.APIClient, nodeDescription *api.NodeDescription, task *api.Task, secrets exec.SecretGetter) (exec.Controller, error) { 45 adapter, err := newContainerAdapter(client, nodeDescription, task, secrets) 46 if err != nil { 47 return nil, err 48 } 49 50 return &controller{ 51 task: task, 52 adapter: adapter, 53 closed: make(chan struct{}), 54 }, nil 55 } 56 57 // ContainerStatus returns the container-specific status for the task. 58 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 59 ctnr, err := r.adapter.inspect(ctx) 60 if err != nil { 61 if isUnknownContainer(err) { 62 return nil, nil 63 } 64 65 return nil, err 66 } 67 return parseContainerStatus(ctnr) 68 } 69 70 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 71 ctnr, err := r.adapter.inspect(ctx) 72 if err != nil { 73 if isUnknownContainer(err) { 74 return nil, nil 75 } 76 77 return nil, err 78 } 79 80 return parsePortStatus(ctnr) 81 } 82 83 // Update takes a recent task update and applies it to the container. 84 func (r *controller) Update(ctx context.Context, t *api.Task) error { 85 log.G(ctx).Warnf("task updates not yet supported") 86 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 87 // updates of metadata, such as labelling, as well as any other properties 88 // that make sense. 89 return nil 90 } 91 92 // Prepare creates a container and ensures the image is pulled. 93 // 94 // If the container has already be created, exec.ErrTaskPrepared is returned. 95 func (r *controller) Prepare(ctx context.Context) error { 96 if err := r.checkClosed(); err != nil { 97 return err 98 } 99 100 // Make sure all the networks that the task needs are created. 101 if err := r.adapter.createNetworks(ctx); err != nil { 102 return err 103 } 104 105 // Make sure all the volumes that the task needs are created. 106 if err := r.adapter.createVolumes(ctx); err != nil { 107 return err 108 } 109 110 if r.pulled == nil { 111 // Launches a re-entrant pull operation associated with controller, 112 // dissociating the context from the caller's context. Allows pull 113 // operation to be re-entrant on calls to prepare, resuming from the 114 // same point after cancellation. 115 var pctx context.Context 116 117 r.pulled = make(chan struct{}) 118 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 119 120 go func() { 121 defer close(r.pulled) 122 r.pullErr = r.adapter.pullImage(pctx) 123 }() 124 } 125 126 select { 127 case <-ctx.Done(): 128 return ctx.Err() 129 case <-r.pulled: 130 if r.pullErr != nil { 131 // NOTE(stevvooe): We always try to pull the image to make sure we have 132 // the most up to date version. This will return an error, but we only 133 // log it. If the image truly doesn't exist, the create below will 134 // error out. 135 // 136 // This gives us some nice behavior where we use up to date versions of 137 // mutable tags, but will still run if the old image is available but a 138 // registry is down. 139 // 140 // If you don't want this behavior, lock down your image to an 141 // immutable tag or digest. 142 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 143 } 144 } 145 146 if err := r.adapter.create(ctx); err != nil { 147 if isContainerCreateNameConflict(err) { 148 if _, err := r.adapter.inspect(ctx); err != nil { 149 return err 150 } 151 152 // container is already created. success! 153 return exec.ErrTaskPrepared 154 } 155 156 return err 157 } 158 159 return nil 160 } 161 162 // Start the container. An error will be returned if the container is already started. 163 func (r *controller) Start(ctx context.Context) error { 164 if err := r.checkClosed(); err != nil { 165 return err 166 } 167 168 ctnr, err := r.adapter.inspect(ctx) 169 if err != nil { 170 return err 171 } 172 173 // Detect whether the container has *ever* been started. If so, we don't 174 // issue the start. 175 // 176 // TODO(stevvooe): This is very racy. While reading inspect, another could 177 // start the process and we could end up starting it twice. 178 if ctnr.State.Status != "created" { 179 return exec.ErrTaskStarted 180 } 181 182 if err := r.adapter.start(ctx); err != nil { 183 return errors.Wrap(err, "starting container failed") 184 } 185 186 // no health check 187 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil { 188 return nil 189 } 190 191 healthCmd := ctnr.Config.Healthcheck.Test 192 193 if len(healthCmd) == 0 { 194 // this field should be filled, even if inherited from image 195 // if it's empty, health check will always be at starting status 196 // so treat it as no health check, and return directly 197 return nil 198 } 199 200 // health check is disabled 201 if healthCmd[0] == "NONE" { 202 return nil 203 } 204 205 // wait for container to be healthy 206 eventq, closed, err := r.adapter.events(ctx) 207 if err != nil { 208 return err 209 } 210 for { 211 select { 212 case event := <-eventq: 213 if !r.matchevent(event) { 214 continue 215 } 216 217 switch event.Action { 218 case "die": // exit on terminal events 219 ctnr, err := r.adapter.inspect(ctx) 220 if err != nil { 221 return errors.Wrap(err, "die event received") 222 } 223 224 return makeExitError(ctnr) 225 case "destroy": 226 // If we get here, something has gone wrong but we want to exit 227 // and report anyways. 228 return ErrContainerDestroyed 229 230 case "health_status: unhealthy": 231 // in this case, we stop the container and report unhealthy status 232 // TODO(runshenzhu): double check if it can cause a dead lock issue here 233 if err := r.Shutdown(ctx); err != nil { 234 return errors.Wrap(err, "unhealthy container shutdown failed") 235 } 236 return ErrContainerUnhealthy 237 238 case "health_status: healthy": 239 return nil 240 } 241 case <-closed: 242 // restart! 243 eventq, closed, err = r.adapter.events(ctx) 244 if err != nil { 245 return err 246 } 247 case <-ctx.Done(): 248 return ctx.Err() 249 case <-r.closed: 250 return r.err 251 } 252 } 253 } 254 255 // Wait on the container to exit. 256 func (r *controller) Wait(ctx context.Context) error { 257 if err := r.checkClosed(); err != nil { 258 return err 259 } 260 261 // check the initial state and report that. 262 ctnr, err := r.adapter.inspect(ctx) 263 if err != nil { 264 return errors.Wrap(err, "inspecting container failed") 265 } 266 267 switch ctnr.State.Status { 268 case "exited", "dead": 269 // TODO(stevvooe): Treating container status dead as exited. There may 270 // be more to do if we have dead containers. Note that this is not the 271 // same as task state DEAD, which means the container is completely 272 // freed on a node. 273 274 return makeExitError(ctnr) 275 } 276 277 eventq, closed, err := r.adapter.events(ctx) 278 if err != nil { 279 return err 280 } 281 282 for { 283 select { 284 case event := <-eventq: 285 if !r.matchevent(event) { 286 continue 287 } 288 289 switch event.Action { 290 case "die": // exit on terminal events 291 ctnr, err := r.adapter.inspect(ctx) 292 if err != nil { 293 return errors.Wrap(err, "die event received") 294 } 295 296 return makeExitError(ctnr) 297 case "destroy": 298 // If we get here, something has gone wrong but we want to exit 299 // and report anyways. 300 return ErrContainerDestroyed 301 302 case "health_status: unhealthy": 303 // in this case, we stop the container and report unhealthy status 304 // TODO(runshenzhu): double check if it can cause a dead lock issue here 305 if err := r.Shutdown(ctx); err != nil { 306 return errors.Wrap(err, "unhealthy container shutdown failed") 307 } 308 return ErrContainerUnhealthy 309 } 310 case <-closed: 311 // restart! 312 eventq, closed, err = r.adapter.events(ctx) 313 if err != nil { 314 return err 315 } 316 case <-ctx.Done(): 317 return ctx.Err() 318 case <-r.closed: 319 return r.err 320 } 321 } 322 } 323 324 // Shutdown the container cleanly. 325 func (r *controller) Shutdown(ctx context.Context) error { 326 if err := r.checkClosed(); err != nil { 327 return err 328 } 329 330 if r.cancelPull != nil { 331 r.cancelPull() 332 } 333 334 if err := r.adapter.shutdown(ctx); err != nil { 335 if isUnknownContainer(err) || isStoppedContainer(err) { 336 return nil 337 } 338 339 return err 340 } 341 342 return nil 343 } 344 345 // Terminate the container, with force. 346 func (r *controller) Terminate(ctx context.Context) error { 347 if err := r.checkClosed(); err != nil { 348 return err 349 } 350 351 if r.cancelPull != nil { 352 r.cancelPull() 353 } 354 355 if err := r.adapter.terminate(ctx); err != nil { 356 if isUnknownContainer(err) { 357 return nil 358 } 359 360 return err 361 } 362 363 return nil 364 } 365 366 // Remove the container and its resources. 367 func (r *controller) Remove(ctx context.Context) error { 368 if err := r.checkClosed(); err != nil { 369 return err 370 } 371 372 if r.cancelPull != nil { 373 r.cancelPull() 374 } 375 376 // It may be necessary to shut down the task before removing it. 377 if err := r.Shutdown(ctx); err != nil { 378 if isUnknownContainer(err) { 379 return nil 380 } 381 382 // This may fail if the task was already shut down. 383 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 384 } 385 386 // Try removing networks referenced in this task in case this 387 // task is the last one referencing it 388 if err := r.adapter.removeNetworks(ctx); err != nil { 389 if isUnknownContainer(err) { 390 return nil 391 } 392 393 return err 394 } 395 396 if err := r.adapter.remove(ctx); err != nil { 397 if isUnknownContainer(err) { 398 return nil 399 } 400 401 return err 402 } 403 404 return nil 405 } 406 407 // waitReady waits for a container to be "ready". 408 // Ready means it's past the started state. 409 func (r *controller) waitReady(pctx context.Context) error { 410 if err := r.checkClosed(); err != nil { 411 return err 412 } 413 414 ctx, cancel := context.WithCancel(pctx) 415 defer cancel() 416 417 eventq, closed, err := r.adapter.events(ctx) 418 if err != nil { 419 return err 420 } 421 422 ctnr, err := r.adapter.inspect(ctx) 423 if err != nil { 424 if !isUnknownContainer(err) { 425 return errors.Wrap(err, "inspect container failed") 426 } 427 } else { 428 switch ctnr.State.Status { 429 case "running", "exited", "dead": 430 return nil 431 } 432 } 433 434 for { 435 select { 436 case event := <-eventq: 437 if !r.matchevent(event) { 438 continue 439 } 440 441 switch event.Action { 442 case "start": 443 return nil 444 } 445 case <-closed: 446 // restart! 447 eventq, closed, err = r.adapter.events(ctx) 448 if err != nil { 449 return err 450 } 451 case <-ctx.Done(): 452 return ctx.Err() 453 case <-r.closed: 454 return r.err 455 } 456 } 457 } 458 459 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 460 if err := r.checkClosed(); err != nil { 461 return err 462 } 463 464 if err := r.waitReady(ctx); err != nil { 465 return errors.Wrap(err, "container not ready for logs") 466 } 467 468 rc, err := r.adapter.logs(ctx, options) 469 if err != nil { 470 return errors.Wrap(err, "failed getting container logs") 471 } 472 defer rc.Close() 473 474 var ( 475 // use a rate limiter to keep things under control but also provides some 476 // ability coalesce messages. 477 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 478 msgctx = api.LogContext{ 479 NodeID: r.task.NodeID, 480 ServiceID: r.task.ServiceID, 481 TaskID: r.task.ID, 482 } 483 ) 484 485 brd := bufio.NewReader(rc) 486 for { 487 // so, message header is 8 bytes, treat as uint64, pull stream off MSB 488 var header uint64 489 if err := binary.Read(brd, binary.BigEndian, &header); err != nil { 490 if err == io.EOF { 491 return nil 492 } 493 494 return errors.Wrap(err, "failed reading log header") 495 } 496 497 stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3)) 498 499 // limit here to decrease allocation back pressure. 500 if err := limiter.WaitN(ctx, int(size)); err != nil { 501 return errors.Wrap(err, "failed rate limiter") 502 } 503 504 buf := make([]byte, size) 505 _, err := io.ReadFull(brd, buf) 506 if err != nil { 507 return errors.Wrap(err, "failed reading buffer") 508 } 509 510 // Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish 511 parts := bytes.SplitN(buf, []byte(" "), 2) 512 if len(parts) != 2 { 513 return fmt.Errorf("invalid timestamp in log message: %v", buf) 514 } 515 516 ts, err := time.Parse(time.RFC3339Nano, string(parts[0])) 517 if err != nil { 518 return errors.Wrap(err, "failed to parse timestamp") 519 } 520 521 tsp, err := gogotypes.TimestampProto(ts) 522 if err != nil { 523 return errors.Wrap(err, "failed to convert timestamp") 524 } 525 526 if err := publisher.Publish(ctx, api.LogMessage{ 527 Context: msgctx, 528 Timestamp: tsp, 529 Stream: api.LogStream(stream), 530 531 Data: parts[1], 532 }); err != nil { 533 return errors.Wrap(err, "failed to publish log message") 534 } 535 } 536 } 537 538 // Close the controller and clean up any ephemeral resources. 539 func (r *controller) Close() error { 540 select { 541 case <-r.closed: 542 return r.err 543 default: 544 if r.cancelPull != nil { 545 r.cancelPull() 546 } 547 548 r.err = exec.ErrControllerClosed 549 close(r.closed) 550 } 551 return nil 552 } 553 554 func (r *controller) matchevent(event events.Message) bool { 555 if event.Type != events.ContainerEventType { 556 return false 557 } 558 559 // TODO(stevvooe): Filter based on ID matching, in addition to name. 560 561 // Make sure the events are for this container. 562 if event.Actor.Attributes["name"] != r.adapter.container.name() { 563 return false 564 } 565 566 return true 567 } 568 569 func (r *controller) checkClosed() error { 570 select { 571 case <-r.closed: 572 return r.err 573 default: 574 return nil 575 } 576 } 577 578 type exitError struct { 579 code int 580 cause error 581 containerStatus *api.ContainerStatus 582 } 583 584 func (e *exitError) Error() string { 585 if e.cause != nil { 586 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 587 } 588 589 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 590 } 591 592 func (e *exitError) ExitCode() int { 593 return int(e.containerStatus.ExitCode) 594 } 595 596 func (e *exitError) Cause() error { 597 return e.cause 598 } 599 600 func makeExitError(ctnr types.ContainerJSON) error { 601 if ctnr.State.ExitCode != 0 { 602 var cause error 603 if ctnr.State.Error != "" { 604 cause = errors.New(ctnr.State.Error) 605 } 606 607 cstatus, _ := parseContainerStatus(ctnr) 608 return &exitError{ 609 code: ctnr.State.ExitCode, 610 cause: cause, 611 containerStatus: cstatus, 612 } 613 } 614 615 return nil 616 617 } 618 619 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 620 status := &api.ContainerStatus{ 621 ContainerID: ctnr.ID, 622 PID: int32(ctnr.State.Pid), 623 ExitCode: int32(ctnr.State.ExitCode), 624 } 625 626 return status, nil 627 } 628 629 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 630 status := &api.PortStatus{} 631 632 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 633 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 634 if err != nil { 635 return nil, err 636 } 637 status.Ports = exposedPorts 638 } 639 640 return status, nil 641 } 642 643 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 644 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 645 646 for portProtocol, mapping := range portMap { 647 parts := strings.SplitN(string(portProtocol), "/", 2) 648 if len(parts) != 2 { 649 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 650 } 651 652 port, err := strconv.ParseUint(parts[0], 10, 16) 653 if err != nil { 654 return nil, err 655 } 656 657 var protocol api.PortConfig_Protocol 658 switch strings.ToLower(parts[1]) { 659 case "tcp": 660 protocol = api.ProtocolTCP 661 case "udp": 662 protocol = api.ProtocolUDP 663 case "sctp": 664 protocol = api.ProtocolSCTP 665 default: 666 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 667 } 668 669 for _, binding := range mapping { 670 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 671 if err != nil { 672 return nil, err 673 } 674 675 // TODO(aluzzardi): We're losing the port `name` here since 676 // there's no way to retrieve it back from the Engine. 677 exposedPorts = append(exposedPorts, &api.PortConfig{ 678 PublishMode: api.PublishModeHost, 679 Protocol: protocol, 680 TargetPort: uint32(port), 681 PublishedPort: uint32(hostPort), 682 }) 683 } 684 } 685 686 return exposedPorts, nil 687 }