github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/daemon/cluster/executor/container/controller.go (about) 1 package container // import "github.com/Prakhar-Agarwal-byte/moby/daemon/cluster/executor/container" 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/Prakhar-Agarwal-byte/moby/api/types" 12 "github.com/Prakhar-Agarwal-byte/moby/api/types/events" 13 executorpkg "github.com/Prakhar-Agarwal-byte/moby/daemon/cluster/executor" 14 "github.com/Prakhar-Agarwal-byte/moby/libnetwork" 15 "github.com/docker/go-connections/nat" 16 gogotypes "github.com/gogo/protobuf/types" 17 "github.com/moby/swarmkit/v2/agent/exec" 18 "github.com/moby/swarmkit/v2/api" 19 "github.com/moby/swarmkit/v2/log" 20 "github.com/pkg/errors" 21 "golang.org/x/time/rate" 22 ) 23 24 const defaultGossipConvergeDelay = 2 * time.Second 25 26 // waitNodeAttachmentsTimeout defines the total period of time we should wait 27 // for node attachments to be ready before giving up on starting a task 28 const waitNodeAttachmentsTimeout = 30 * time.Second 29 30 // controller implements agent.Controller against docker's API. 31 // 32 // Most operations against docker's API are done through the container name, 33 // which is unique to the task. 34 type controller struct { 35 task *api.Task 36 adapter *containerAdapter 37 closed chan struct{} 38 err error 39 pulled chan struct{} // closed after pull 40 cancelPull func() // cancels pull context if not nil 41 pullErr error // pull error, only read after pulled closed 42 } 43 44 var _ exec.Controller = &controller{} 45 46 // NewController returns a docker exec runner for the provided task. 47 func newController(b executorpkg.Backend, i executorpkg.ImageBackend, v executorpkg.VolumeBackend, task *api.Task, node *api.NodeDescription, dependencies exec.DependencyGetter) (*controller, error) { 48 adapter, err := newContainerAdapter(b, i, v, task, node, dependencies) 49 if err != nil { 50 return nil, err 51 } 52 53 return &controller{ 54 task: task, 55 adapter: adapter, 56 closed: make(chan struct{}), 57 }, nil 58 } 59 60 func (r *controller) Task() (*api.Task, error) { 61 return r.task, nil 62 } 63 64 // ContainerStatus returns the container-specific status for the task. 65 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 66 ctnr, err := r.adapter.inspect(ctx) 67 if err != nil { 68 if isUnknownContainer(err) { 69 return nil, nil 70 } 71 return nil, err 72 } 73 return parseContainerStatus(ctnr) 74 } 75 76 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 77 ctnr, err := r.adapter.inspect(ctx) 78 if err != nil { 79 if isUnknownContainer(err) { 80 return nil, nil 81 } 82 83 return nil, err 84 } 85 86 return parsePortStatus(ctnr) 87 } 88 89 // Update tasks a recent task update and applies it to the container. 90 func (r *controller) Update(ctx context.Context, t *api.Task) error { 91 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 92 // updates of metadata, such as labelling, as well as any other properties 93 // that make sense. 94 return nil 95 } 96 97 // Prepare creates a container and ensures the image is pulled. 98 // 99 // If the container has already be created, exec.ErrTaskPrepared is returned. 100 func (r *controller) Prepare(ctx context.Context) error { 101 if err := r.checkClosed(); err != nil { 102 return err 103 } 104 105 // Before we create networks, we need to make sure that the node has all of 106 // the network attachments that the task needs. This will block until that 107 // is the case or the context has expired. 108 // NOTE(dperny): Prepare doesn't time out on its own (that is, the context 109 // passed in does not expire after any period of time), which means if the 110 // node attachment never arrives (for example, if the network's IP address 111 // space is exhausted), then the tasks on the node will park in PREPARING 112 // forever (or until the node dies). To avoid this case, we create a new 113 // context with a fixed deadline, and give up. In normal operation, a node 114 // update with the node IP address should come in hot on the tail of the 115 // task being assigned to the node, and this should exit on the order of 116 // milliseconds, but to be extra conservative we'll give it 30 seconds to 117 // time out before giving up. 118 waitNodeAttachmentsContext, waitCancel := context.WithTimeout(ctx, waitNodeAttachmentsTimeout) 119 defer waitCancel() 120 if err := r.adapter.waitNodeAttachments(waitNodeAttachmentsContext); err != nil { 121 return err 122 } 123 124 // could take a while for the cluster volumes to become available. set for 125 // 5 minutes, I guess? 126 // TODO(dperny): do this more intelligently. return a better error. 127 waitClusterVolumesCtx, wcvcancel := context.WithTimeout(ctx, 5*time.Minute) 128 defer wcvcancel() 129 if err := r.adapter.waitClusterVolumes(waitClusterVolumesCtx); err != nil { 130 return err 131 } 132 133 // Make sure all the networks that the task needs are created. 134 if err := r.adapter.createNetworks(ctx); err != nil { 135 return err 136 } 137 138 // Make sure all the volumes that the task needs are created. 139 if err := r.adapter.createVolumes(ctx); err != nil { 140 return err 141 } 142 143 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 144 if r.pulled == nil { 145 // Fork the pull to a different context to allow pull to continue 146 // on re-entrant calls to Prepare. This ensures that Prepare can be 147 // idempotent and not incur the extra cost of pulling when 148 // cancelled on updates. 149 var pctx context.Context 150 151 r.pulled = make(chan struct{}) 152 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 153 154 go func() { 155 defer close(r.pulled) 156 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 157 }() 158 } 159 160 select { 161 case <-ctx.Done(): 162 return ctx.Err() 163 case <-r.pulled: 164 if r.pullErr != nil { 165 // NOTE(stevvooe): We always try to pull the image to make sure we have 166 // the most up to date version. This will return an error, but we only 167 // log it. If the image truly doesn't exist, the create below will 168 // error out. 169 // 170 // This gives us some nice behavior where we use up to date versions of 171 // mutable tags, but will still run if the old image is available but a 172 // registry is down. 173 // 174 // If you don't want this behavior, lock down your image to an 175 // immutable tag or digest. 176 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 177 } 178 } 179 } 180 if err := r.adapter.create(ctx); err != nil { 181 if isContainerCreateNameConflict(err) { 182 if _, err := r.adapter.inspect(ctx); err != nil { 183 return err 184 } 185 186 // container is already created. success! 187 return exec.ErrTaskPrepared 188 } 189 190 return err 191 } 192 193 return nil 194 } 195 196 // Start the container. An error will be returned if the container is already started. 197 func (r *controller) Start(ctx context.Context) error { 198 if err := r.checkClosed(); err != nil { 199 return err 200 } 201 202 ctnr, err := r.adapter.inspect(ctx) 203 if err != nil { 204 return err 205 } 206 207 // Detect whether the container has *ever* been started. If so, we don't 208 // issue the start. 209 // 210 // TODO(stevvooe): This is very racy. While reading inspect, another could 211 // start the process and we could end up starting it twice. 212 if ctnr.State.Status != "created" { 213 return exec.ErrTaskStarted 214 } 215 216 var lnErr libnetwork.ErrNoSuchNetwork 217 for { 218 if err := r.adapter.start(ctx); err != nil { 219 if errors.As(err, &lnErr) { 220 // Retry network creation again if we 221 // failed because some of the networks 222 // were not found. 223 if err := r.adapter.createNetworks(ctx); err != nil { 224 return err 225 } 226 227 continue 228 } 229 230 return errors.Wrap(err, "starting container failed") 231 } 232 233 break 234 } 235 236 // no health check 237 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 238 if err := r.adapter.activateServiceBinding(); err != nil { 239 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 240 return err 241 } 242 return nil 243 } 244 245 // wait for container to be healthy 246 eventq := r.adapter.events(ctx) 247 248 var healthErr error 249 for { 250 select { 251 case event := <-eventq: 252 if !r.matchevent(event) { 253 continue 254 } 255 256 switch event.Action { 257 case events.ActionDie: // exit on terminal events 258 ctnr, err := r.adapter.inspect(ctx) 259 if err != nil { 260 return errors.Wrap(err, "die event received") 261 } else if ctnr.State.ExitCode != 0 { 262 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 263 } 264 265 return nil 266 case events.ActionDestroy: 267 // If we get here, something has gone wrong but we want to exit 268 // and report anyways. 269 return ErrContainerDestroyed 270 case events.ActionHealthStatusUnhealthy: 271 // in this case, we stop the container and report unhealthy status 272 if err := r.Shutdown(ctx); err != nil { 273 return errors.Wrap(err, "unhealthy container shutdown failed") 274 } 275 // set health check error, and wait for container to fully exit ("die" event) 276 healthErr = ErrContainerUnhealthy 277 case events.ActionHealthStatusHealthy: 278 if err := r.adapter.activateServiceBinding(); err != nil { 279 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 280 return err 281 } 282 return nil 283 } 284 case <-ctx.Done(): 285 return ctx.Err() 286 case <-r.closed: 287 return r.err 288 } 289 } 290 } 291 292 // Wait on the container to exit. 293 func (r *controller) Wait(pctx context.Context) error { 294 if err := r.checkClosed(); err != nil { 295 return err 296 } 297 298 ctx, cancel := context.WithCancel(pctx) 299 defer cancel() 300 301 healthErr := make(chan error, 1) 302 go func() { 303 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 304 defer cancel() 305 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 306 healthErr <- ErrContainerUnhealthy 307 if err := r.Shutdown(ectx); err != nil { 308 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 309 } 310 } 311 }() 312 313 waitC, err := r.adapter.wait(ctx) 314 if err != nil { 315 return err 316 } 317 318 if status := <-waitC; status.ExitCode() != 0 { 319 exitErr := &exitError{ 320 code: status.ExitCode(), 321 } 322 323 // Set the cause if it is knowable. 324 select { 325 case e := <-healthErr: 326 exitErr.cause = e 327 default: 328 if status.Err() != nil { 329 exitErr.cause = status.Err() 330 } 331 } 332 333 return exitErr 334 } 335 336 return nil 337 } 338 339 func (r *controller) hasServiceBinding() bool { 340 if r.task == nil { 341 return false 342 } 343 344 // service is attached to a network besides the default bridge 345 for _, na := range r.task.Networks { 346 if na.Network == nil || 347 na.Network.DriverState == nil || 348 na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" { 349 continue 350 } 351 return true 352 } 353 354 return false 355 } 356 357 // Shutdown the container cleanly. 358 func (r *controller) Shutdown(ctx context.Context) error { 359 if err := r.checkClosed(); err != nil { 360 return err 361 } 362 363 if r.cancelPull != nil { 364 r.cancelPull() 365 } 366 367 if r.hasServiceBinding() { 368 // remove container from service binding 369 if err := r.adapter.deactivateServiceBinding(); err != nil { 370 log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name()) 371 // Don't return an error here, because failure to deactivate 372 // the service binding is expected if the container was never 373 // started. 374 } 375 376 // add a delay for gossip converge 377 // TODO(dongluochen): this delay should be configurable to fit different cluster size and network delay. 378 time.Sleep(defaultGossipConvergeDelay) 379 } 380 381 if err := r.adapter.shutdown(ctx); err != nil { 382 if !(isUnknownContainer(err) || isStoppedContainer(err)) { 383 return err 384 } 385 } 386 387 // Try removing networks referenced in this task in case this 388 // task is the last one referencing it 389 if err := r.adapter.removeNetworks(ctx); err != nil { 390 if !isUnknownContainer(err) { 391 return err 392 } 393 } 394 395 return nil 396 } 397 398 // Terminate the container, with force. 399 func (r *controller) Terminate(ctx context.Context) error { 400 if err := r.checkClosed(); err != nil { 401 return err 402 } 403 404 if r.cancelPull != nil { 405 r.cancelPull() 406 } 407 408 if err := r.adapter.terminate(ctx); err != nil { 409 if isUnknownContainer(err) { 410 return nil 411 } 412 413 return err 414 } 415 416 return nil 417 } 418 419 // Remove the container and its resources. 420 func (r *controller) Remove(ctx context.Context) error { 421 if err := r.checkClosed(); err != nil { 422 return err 423 } 424 425 if r.cancelPull != nil { 426 r.cancelPull() 427 } 428 429 // It may be necessary to shut down the task before removing it. 430 if err := r.Shutdown(ctx); err != nil { 431 if isUnknownContainer(err) { 432 return nil 433 } 434 // This may fail if the task was already shut down. 435 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 436 } 437 438 if err := r.adapter.remove(ctx); err != nil { 439 if isUnknownContainer(err) { 440 return nil 441 } 442 443 return err 444 } 445 return nil 446 } 447 448 // waitReady waits for a container to be "ready". 449 // Ready means it's past the started state. 450 func (r *controller) waitReady(pctx context.Context) error { 451 if err := r.checkClosed(); err != nil { 452 return err 453 } 454 455 ctx, cancel := context.WithCancel(pctx) 456 defer cancel() 457 458 eventq := r.adapter.events(ctx) 459 460 ctnr, err := r.adapter.inspect(ctx) 461 if err != nil { 462 if !isUnknownContainer(err) { 463 return errors.Wrap(err, "inspect container failed") 464 } 465 } else { 466 switch ctnr.State.Status { 467 case "running", "exited", "dead": 468 return nil 469 } 470 } 471 472 for { 473 select { 474 case event := <-eventq: 475 if !r.matchevent(event) { 476 continue 477 } 478 479 switch event.Action { 480 case "start": 481 return nil 482 } 483 case <-ctx.Done(): 484 return ctx.Err() 485 case <-r.closed: 486 return r.err 487 } 488 } 489 } 490 491 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 492 if err := r.checkClosed(); err != nil { 493 return err 494 } 495 496 // if we're following, wait for this container to be ready. there is a 497 // problem here: if the container will never be ready (for example, it has 498 // been totally deleted) then this will wait forever. however, this doesn't 499 // actually cause any UI issues, and shouldn't be a problem. the stuck wait 500 // will go away when the follow (context) is canceled. 501 if options.Follow { 502 if err := r.waitReady(ctx); err != nil { 503 return errors.Wrap(err, "container not ready for logs") 504 } 505 } 506 // if we're not following, we're not gonna wait for the container to be 507 // ready. just call logs. if the container isn't ready, the call will fail 508 // and return an error. no big deal, we don't care, we only want the logs 509 // we can get RIGHT NOW with no follow 510 511 logsContext, cancel := context.WithCancel(ctx) 512 msgs, err := r.adapter.logs(logsContext, options) 513 defer cancel() 514 if err != nil { 515 return errors.Wrap(err, "failed getting container logs") 516 } 517 518 var ( 519 // use a rate limiter to keep things under control but also provides some 520 // ability coalesce messages. 521 // this will implement a "token bucket" of size 10 MB, initially full and refilled 522 // at rate 10 MB tokens per second. 523 limiter = rate.NewLimiter(10<<20, 10<<20) // 10 MB/s 524 msgctx = api.LogContext{ 525 NodeID: r.task.NodeID, 526 ServiceID: r.task.ServiceID, 527 TaskID: r.task.ID, 528 } 529 ) 530 531 for { 532 msg, ok := <-msgs 533 if !ok { 534 // we're done here, no more messages 535 return nil 536 } 537 538 if msg.Err != nil { 539 // the deferred cancel closes the adapter's log stream 540 return msg.Err 541 } 542 543 // wait here for the limiter to catch up 544 if err := limiter.WaitN(ctx, len(msg.Line)); err != nil { 545 return errors.Wrap(err, "failed rate limiter") 546 } 547 tsp, err := gogotypes.TimestampProto(msg.Timestamp) 548 if err != nil { 549 return errors.Wrap(err, "failed to convert timestamp") 550 } 551 var stream api.LogStream 552 if msg.Source == "stdout" { 553 stream = api.LogStreamStdout 554 } else if msg.Source == "stderr" { 555 stream = api.LogStreamStderr 556 } 557 558 // parse the details out of the Attrs map 559 var attrs []api.LogAttr 560 if len(msg.Attrs) != 0 { 561 attrs = make([]api.LogAttr, 0, len(msg.Attrs)) 562 for _, attr := range msg.Attrs { 563 attrs = append(attrs, api.LogAttr{Key: attr.Key, Value: attr.Value}) 564 } 565 } 566 567 if err := publisher.Publish(ctx, api.LogMessage{ 568 Context: msgctx, 569 Timestamp: tsp, 570 Stream: stream, 571 Attrs: attrs, 572 Data: msg.Line, 573 }); err != nil { 574 return errors.Wrap(err, "failed to publish log message") 575 } 576 } 577 } 578 579 // Close the runner and clean up any ephemeral resources. 580 func (r *controller) Close() error { 581 select { 582 case <-r.closed: 583 return r.err 584 default: 585 if r.cancelPull != nil { 586 r.cancelPull() 587 } 588 589 r.err = exec.ErrControllerClosed 590 close(r.closed) 591 } 592 return nil 593 } 594 595 func (r *controller) matchevent(event events.Message) bool { 596 if event.Type != events.ContainerEventType { 597 return false 598 } 599 // we can't filter using id since it will have huge chances to introduce a deadlock. see #33377. 600 return event.Actor.Attributes["name"] == r.adapter.container.name() 601 } 602 603 func (r *controller) checkClosed() error { 604 select { 605 case <-r.closed: 606 return r.err 607 default: 608 return nil 609 } 610 } 611 612 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 613 status := &api.ContainerStatus{ 614 ContainerID: ctnr.ID, 615 PID: int32(ctnr.State.Pid), 616 ExitCode: int32(ctnr.State.ExitCode), 617 } 618 619 return status, nil 620 } 621 622 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 623 status := &api.PortStatus{} 624 625 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 626 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 627 if err != nil { 628 return nil, err 629 } 630 status.Ports = exposedPorts 631 } 632 633 return status, nil 634 } 635 636 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 637 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 638 639 for portProtocol, mapping := range portMap { 640 p, proto, ok := strings.Cut(string(portProtocol), "/") 641 if !ok { 642 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 643 } 644 645 port, err := strconv.ParseUint(p, 10, 16) 646 if err != nil { 647 return nil, err 648 } 649 650 var protocol api.PortConfig_Protocol 651 switch strings.ToLower(proto) { 652 case "tcp": 653 protocol = api.ProtocolTCP 654 case "udp": 655 protocol = api.ProtocolUDP 656 case "sctp": 657 protocol = api.ProtocolSCTP 658 default: 659 return nil, fmt.Errorf("invalid protocol: %s", proto) 660 } 661 662 for _, binding := range mapping { 663 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 664 if err != nil { 665 return nil, err 666 } 667 668 // TODO(aluzzardi): We're losing the port `name` here since 669 // there's no way to retrieve it back from the Engine. 670 exposedPorts = append(exposedPorts, &api.PortConfig{ 671 PublishMode: api.PublishModeHost, 672 Protocol: protocol, 673 TargetPort: uint32(port), 674 PublishedPort: uint32(hostPort), 675 }) 676 } 677 } 678 679 return exposedPorts, nil 680 } 681 682 type exitError struct { 683 code int 684 cause error 685 } 686 687 func (e *exitError) Error() string { 688 if e.cause != nil { 689 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 690 } 691 692 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 693 } 694 695 func (e *exitError) ExitCode() int { 696 return e.code 697 } 698 699 func (e *exitError) Cause() error { 700 return e.cause 701 } 702 703 // checkHealth blocks until unhealthy container is detected or ctx exits 704 func (r *controller) checkHealth(ctx context.Context) error { 705 eventq := r.adapter.events(ctx) 706 707 for { 708 select { 709 case <-ctx.Done(): 710 return nil 711 case <-r.closed: 712 return nil 713 case event := <-eventq: 714 if !r.matchevent(event) { 715 continue 716 } 717 718 switch event.Action { 719 case events.ActionHealthStatusUnhealthy: 720 return ErrContainerUnhealthy 721 } 722 } 723 } 724 }