github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/daemon/cluster/executor/container/controller.go (about) 1 package container // import "github.com/demonoid81/moby/daemon/cluster/executor/container" 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/demonoid81/moby/api/types" 12 "github.com/demonoid81/moby/api/types/events" 13 executorpkg "github.com/demonoid81/moby/daemon/cluster/executor" 14 "github.com/docker/go-connections/nat" 15 "github.com/demonoid81/libnetwork" 16 "github.com/docker/swarmkit/agent/exec" 17 "github.com/docker/swarmkit/api" 18 "github.com/docker/swarmkit/log" 19 gogotypes "github.com/gogo/protobuf/types" 20 "github.com/pkg/errors" 21 "golang.org/x/time/rate" 22 ) 23 24 const defaultGossipConvergeDelay = 2 * time.Second 25 26 // waitNodeAttachmentsTimeout defines the total period of time we should wait 27 // for node attachments to be ready before giving up on starting a task 28 const waitNodeAttachmentsTimeout = 30 * time.Second 29 30 // controller implements agent.Controller against docker's API. 31 // 32 // Most operations against docker's API are done through the container name, 33 // which is unique to the task. 34 type controller struct { 35 task *api.Task 36 adapter *containerAdapter 37 closed chan struct{} 38 err error 39 pulled chan struct{} // closed after pull 40 cancelPull func() // cancels pull context if not nil 41 pullErr error // pull error, only read after pulled closed 42 } 43 44 var _ exec.Controller = &controller{} 45 46 // NewController returns a docker exec runner for the provided task. 47 func newController(b executorpkg.Backend, i executorpkg.ImageBackend, v executorpkg.VolumeBackend, task *api.Task, node *api.NodeDescription, dependencies exec.DependencyGetter) (*controller, error) { 48 adapter, err := newContainerAdapter(b, i, v, task, node, dependencies) 49 if err != nil { 50 return nil, err 51 } 52 53 return &controller{ 54 task: task, 55 adapter: adapter, 56 closed: make(chan struct{}), 57 }, nil 58 } 59 60 func (r *controller) Task() (*api.Task, error) { 61 return r.task, nil 62 } 63 64 // ContainerStatus returns the container-specific status for the task. 65 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 66 ctnr, err := r.adapter.inspect(ctx) 67 if err != nil { 68 if isUnknownContainer(err) { 69 return nil, nil 70 } 71 return nil, err 72 } 73 return parseContainerStatus(ctnr) 74 } 75 76 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 77 ctnr, err := r.adapter.inspect(ctx) 78 if err != nil { 79 if isUnknownContainer(err) { 80 return nil, nil 81 } 82 83 return nil, err 84 } 85 86 return parsePortStatus(ctnr) 87 } 88 89 // Update tasks a recent task update and applies it to the container. 90 func (r *controller) Update(ctx context.Context, t *api.Task) error { 91 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 92 // updates of metadata, such as labelling, as well as any other properties 93 // that make sense. 94 return nil 95 } 96 97 // Prepare creates a container and ensures the image is pulled. 98 // 99 // If the container has already be created, exec.ErrTaskPrepared is returned. 100 func (r *controller) Prepare(ctx context.Context) error { 101 if err := r.checkClosed(); err != nil { 102 return err 103 } 104 105 // Before we create networks, we need to make sure that the node has all of 106 // the network attachments that the task needs. This will block until that 107 // is the case or the context has expired. 108 // NOTE(dperny): Prepare doesn't time out on its own (that is, the context 109 // passed in does not expire after any period of time), which means if the 110 // node attachment never arrives (for example, if the network's IP address 111 // space is exhausted), then the tasks on the node will park in PREPARING 112 // forever (or until the node dies). To avoid this case, we create a new 113 // context with a fixed deadline, and give up. In normal operation, a node 114 // update with the node IP address should come in hot on the tail of the 115 // task being assigned to the node, and this should exit on the order of 116 // milliseconds, but to be extra conservative we'll give it 30 seconds to 117 // time out before giving up. 118 waitNodeAttachmentsContext, waitCancel := context.WithTimeout(ctx, waitNodeAttachmentsTimeout) 119 defer waitCancel() 120 if err := r.adapter.waitNodeAttachments(waitNodeAttachmentsContext); err != nil { 121 return err 122 } 123 124 // Make sure all the networks that the task needs are created. 125 if err := r.adapter.createNetworks(ctx); err != nil { 126 return err 127 } 128 129 // Make sure all the volumes that the task needs are created. 130 if err := r.adapter.createVolumes(ctx); err != nil { 131 return err 132 } 133 134 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 135 if r.pulled == nil { 136 // Fork the pull to a different context to allow pull to continue 137 // on re-entrant calls to Prepare. This ensures that Prepare can be 138 // idempotent and not incur the extra cost of pulling when 139 // cancelled on updates. 140 var pctx context.Context 141 142 r.pulled = make(chan struct{}) 143 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 144 145 go func() { 146 defer close(r.pulled) 147 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 148 }() 149 } 150 151 select { 152 case <-ctx.Done(): 153 return ctx.Err() 154 case <-r.pulled: 155 if r.pullErr != nil { 156 // NOTE(stevvooe): We always try to pull the image to make sure we have 157 // the most up to date version. This will return an error, but we only 158 // log it. If the image truly doesn't exist, the create below will 159 // error out. 160 // 161 // This gives us some nice behavior where we use up to date versions of 162 // mutable tags, but will still run if the old image is available but a 163 // registry is down. 164 // 165 // If you don't want this behavior, lock down your image to an 166 // immutable tag or digest. 167 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 168 } 169 } 170 } 171 if err := r.adapter.create(ctx); err != nil { 172 if isContainerCreateNameConflict(err) { 173 if _, err := r.adapter.inspect(ctx); err != nil { 174 return err 175 } 176 177 // container is already created. success! 178 return exec.ErrTaskPrepared 179 } 180 181 return err 182 } 183 184 return nil 185 } 186 187 // Start the container. An error will be returned if the container is already started. 188 func (r *controller) Start(ctx context.Context) error { 189 if err := r.checkClosed(); err != nil { 190 return err 191 } 192 193 ctnr, err := r.adapter.inspect(ctx) 194 if err != nil { 195 return err 196 } 197 198 // Detect whether the container has *ever* been started. If so, we don't 199 // issue the start. 200 // 201 // TODO(stevvooe): This is very racy. While reading inspect, another could 202 // start the process and we could end up starting it twice. 203 if ctnr.State.Status != "created" { 204 return exec.ErrTaskStarted 205 } 206 207 var lnErr libnetwork.ErrNoSuchNetwork 208 for { 209 if err := r.adapter.start(ctx); err != nil { 210 if errors.As(err, &lnErr) { 211 // Retry network creation again if we 212 // failed because some of the networks 213 // were not found. 214 if err := r.adapter.createNetworks(ctx); err != nil { 215 return err 216 } 217 218 continue 219 } 220 221 return errors.Wrap(err, "starting container failed") 222 } 223 224 break 225 } 226 227 // no health check 228 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 229 if err := r.adapter.activateServiceBinding(); err != nil { 230 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 231 return err 232 } 233 return nil 234 } 235 236 // wait for container to be healthy 237 eventq := r.adapter.events(ctx) 238 239 var healthErr error 240 for { 241 select { 242 case event := <-eventq: 243 if !r.matchevent(event) { 244 continue 245 } 246 247 switch event.Action { 248 case "die": // exit on terminal events 249 ctnr, err := r.adapter.inspect(ctx) 250 if err != nil { 251 return errors.Wrap(err, "die event received") 252 } else if ctnr.State.ExitCode != 0 { 253 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 254 } 255 256 return nil 257 case "destroy": 258 // If we get here, something has gone wrong but we want to exit 259 // and report anyways. 260 return ErrContainerDestroyed 261 case "health_status: unhealthy": 262 // in this case, we stop the container and report unhealthy status 263 if err := r.Shutdown(ctx); err != nil { 264 return errors.Wrap(err, "unhealthy container shutdown failed") 265 } 266 // set health check error, and wait for container to fully exit ("die" event) 267 healthErr = ErrContainerUnhealthy 268 case "health_status: healthy": 269 if err := r.adapter.activateServiceBinding(); err != nil { 270 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 271 return err 272 } 273 return nil 274 } 275 case <-ctx.Done(): 276 return ctx.Err() 277 case <-r.closed: 278 return r.err 279 } 280 } 281 } 282 283 // Wait on the container to exit. 284 func (r *controller) Wait(pctx context.Context) error { 285 if err := r.checkClosed(); err != nil { 286 return err 287 } 288 289 ctx, cancel := context.WithCancel(pctx) 290 defer cancel() 291 292 healthErr := make(chan error, 1) 293 go func() { 294 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 295 defer cancel() 296 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 297 healthErr <- ErrContainerUnhealthy 298 if err := r.Shutdown(ectx); err != nil { 299 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 300 } 301 } 302 }() 303 304 waitC, err := r.adapter.wait(ctx) 305 if err != nil { 306 return err 307 } 308 309 if status := <-waitC; status.ExitCode() != 0 { 310 exitErr := &exitError{ 311 code: status.ExitCode(), 312 } 313 314 // Set the cause if it is knowable. 315 select { 316 case e := <-healthErr: 317 exitErr.cause = e 318 default: 319 if status.Err() != nil { 320 exitErr.cause = status.Err() 321 } 322 } 323 324 return exitErr 325 } 326 327 return nil 328 } 329 330 func (r *controller) hasServiceBinding() bool { 331 if r.task == nil { 332 return false 333 } 334 335 // service is attached to a network besides the default bridge 336 for _, na := range r.task.Networks { 337 if na.Network == nil || 338 na.Network.DriverState == nil || 339 na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" { 340 continue 341 } 342 return true 343 } 344 345 return false 346 } 347 348 // Shutdown the container cleanly. 349 func (r *controller) Shutdown(ctx context.Context) error { 350 if err := r.checkClosed(); err != nil { 351 return err 352 } 353 354 if r.cancelPull != nil { 355 r.cancelPull() 356 } 357 358 if r.hasServiceBinding() { 359 // remove container from service binding 360 if err := r.adapter.deactivateServiceBinding(); err != nil { 361 log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name()) 362 // Don't return an error here, because failure to deactivate 363 // the service binding is expected if the container was never 364 // started. 365 } 366 367 // add a delay for gossip converge 368 // TODO(dongluochen): this delay should be configurable to fit different cluster size and network delay. 369 time.Sleep(defaultGossipConvergeDelay) 370 } 371 372 if err := r.adapter.shutdown(ctx); err != nil { 373 if !(isUnknownContainer(err) || isStoppedContainer(err)) { 374 return err 375 } 376 } 377 378 // Try removing networks referenced in this task in case this 379 // task is the last one referencing it 380 if err := r.adapter.removeNetworks(ctx); err != nil { 381 if !isUnknownContainer(err) { 382 return err 383 } 384 } 385 386 return nil 387 } 388 389 // Terminate the container, with force. 390 func (r *controller) Terminate(ctx context.Context) error { 391 if err := r.checkClosed(); err != nil { 392 return err 393 } 394 395 if r.cancelPull != nil { 396 r.cancelPull() 397 } 398 399 if err := r.adapter.terminate(ctx); err != nil { 400 if isUnknownContainer(err) { 401 return nil 402 } 403 404 return err 405 } 406 407 return nil 408 } 409 410 // Remove the container and its resources. 411 func (r *controller) Remove(ctx context.Context) error { 412 if err := r.checkClosed(); err != nil { 413 return err 414 } 415 416 if r.cancelPull != nil { 417 r.cancelPull() 418 } 419 420 // It may be necessary to shut down the task before removing it. 421 if err := r.Shutdown(ctx); err != nil { 422 if isUnknownContainer(err) { 423 return nil 424 } 425 // This may fail if the task was already shut down. 426 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 427 } 428 429 if err := r.adapter.remove(ctx); err != nil { 430 if isUnknownContainer(err) { 431 return nil 432 } 433 434 return err 435 } 436 return nil 437 } 438 439 // waitReady waits for a container to be "ready". 440 // Ready means it's past the started state. 441 func (r *controller) waitReady(pctx context.Context) error { 442 if err := r.checkClosed(); err != nil { 443 return err 444 } 445 446 ctx, cancel := context.WithCancel(pctx) 447 defer cancel() 448 449 eventq := r.adapter.events(ctx) 450 451 ctnr, err := r.adapter.inspect(ctx) 452 if err != nil { 453 if !isUnknownContainer(err) { 454 return errors.Wrap(err, "inspect container failed") 455 } 456 } else { 457 switch ctnr.State.Status { 458 case "running", "exited", "dead": 459 return nil 460 } 461 } 462 463 for { 464 select { 465 case event := <-eventq: 466 if !r.matchevent(event) { 467 continue 468 } 469 470 switch event.Action { 471 case "start": 472 return nil 473 } 474 case <-ctx.Done(): 475 return ctx.Err() 476 case <-r.closed: 477 return r.err 478 } 479 } 480 } 481 482 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 483 if err := r.checkClosed(); err != nil { 484 return err 485 } 486 487 // if we're following, wait for this container to be ready. there is a 488 // problem here: if the container will never be ready (for example, it has 489 // been totally deleted) then this will wait forever. however, this doesn't 490 // actually cause any UI issues, and shouldn't be a problem. the stuck wait 491 // will go away when the follow (context) is canceled. 492 if options.Follow { 493 if err := r.waitReady(ctx); err != nil { 494 return errors.Wrap(err, "container not ready for logs") 495 } 496 } 497 // if we're not following, we're not gonna wait for the container to be 498 // ready. just call logs. if the container isn't ready, the call will fail 499 // and return an error. no big deal, we don't care, we only want the logs 500 // we can get RIGHT NOW with no follow 501 502 logsContext, cancel := context.WithCancel(ctx) 503 msgs, err := r.adapter.logs(logsContext, options) 504 defer cancel() 505 if err != nil { 506 return errors.Wrap(err, "failed getting container logs") 507 } 508 509 var ( 510 // use a rate limiter to keep things under control but also provides some 511 // ability coalesce messages. 512 // this will implement a "token bucket" of size 10 MB, initially full and refilled 513 // at rate 10 MB tokens per second. 514 limiter = rate.NewLimiter(10<<20, 10<<20) // 10 MB/s 515 msgctx = api.LogContext{ 516 NodeID: r.task.NodeID, 517 ServiceID: r.task.ServiceID, 518 TaskID: r.task.ID, 519 } 520 ) 521 522 for { 523 msg, ok := <-msgs 524 if !ok { 525 // we're done here, no more messages 526 return nil 527 } 528 529 if msg.Err != nil { 530 // the deferred cancel closes the adapter's log stream 531 return msg.Err 532 } 533 534 // wait here for the limiter to catch up 535 if err := limiter.WaitN(ctx, len(msg.Line)); err != nil { 536 return errors.Wrap(err, "failed rate limiter") 537 } 538 tsp, err := gogotypes.TimestampProto(msg.Timestamp) 539 if err != nil { 540 return errors.Wrap(err, "failed to convert timestamp") 541 } 542 var stream api.LogStream 543 if msg.Source == "stdout" { 544 stream = api.LogStreamStdout 545 } else if msg.Source == "stderr" { 546 stream = api.LogStreamStderr 547 } 548 549 // parse the details out of the Attrs map 550 var attrs []api.LogAttr 551 if len(msg.Attrs) != 0 { 552 attrs = make([]api.LogAttr, 0, len(msg.Attrs)) 553 for _, attr := range msg.Attrs { 554 attrs = append(attrs, api.LogAttr{Key: attr.Key, Value: attr.Value}) 555 } 556 } 557 558 if err := publisher.Publish(ctx, api.LogMessage{ 559 Context: msgctx, 560 Timestamp: tsp, 561 Stream: stream, 562 Attrs: attrs, 563 Data: msg.Line, 564 }); err != nil { 565 return errors.Wrap(err, "failed to publish log message") 566 } 567 } 568 } 569 570 // Close the runner and clean up any ephemeral resources. 571 func (r *controller) Close() error { 572 select { 573 case <-r.closed: 574 return r.err 575 default: 576 if r.cancelPull != nil { 577 r.cancelPull() 578 } 579 580 r.err = exec.ErrControllerClosed 581 close(r.closed) 582 } 583 return nil 584 } 585 586 func (r *controller) matchevent(event events.Message) bool { 587 if event.Type != events.ContainerEventType { 588 return false 589 } 590 // we can't filter using id since it will have huge chances to introduce a deadlock. see #33377. 591 return event.Actor.Attributes["name"] == r.adapter.container.name() 592 } 593 594 func (r *controller) checkClosed() error { 595 select { 596 case <-r.closed: 597 return r.err 598 default: 599 return nil 600 } 601 } 602 603 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 604 status := &api.ContainerStatus{ 605 ContainerID: ctnr.ID, 606 PID: int32(ctnr.State.Pid), 607 ExitCode: int32(ctnr.State.ExitCode), 608 } 609 610 return status, nil 611 } 612 613 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 614 status := &api.PortStatus{} 615 616 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 617 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 618 if err != nil { 619 return nil, err 620 } 621 status.Ports = exposedPorts 622 } 623 624 return status, nil 625 } 626 627 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 628 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 629 630 for portProtocol, mapping := range portMap { 631 parts := strings.SplitN(string(portProtocol), "/", 2) 632 if len(parts) != 2 { 633 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 634 } 635 636 port, err := strconv.ParseUint(parts[0], 10, 16) 637 if err != nil { 638 return nil, err 639 } 640 641 var protocol api.PortConfig_Protocol 642 switch strings.ToLower(parts[1]) { 643 case "tcp": 644 protocol = api.ProtocolTCP 645 case "udp": 646 protocol = api.ProtocolUDP 647 case "sctp": 648 protocol = api.ProtocolSCTP 649 default: 650 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 651 } 652 653 for _, binding := range mapping { 654 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 655 if err != nil { 656 return nil, err 657 } 658 659 // TODO(aluzzardi): We're losing the port `name` here since 660 // there's no way to retrieve it back from the Engine. 661 exposedPorts = append(exposedPorts, &api.PortConfig{ 662 PublishMode: api.PublishModeHost, 663 Protocol: protocol, 664 TargetPort: uint32(port), 665 PublishedPort: uint32(hostPort), 666 }) 667 } 668 } 669 670 return exposedPorts, nil 671 } 672 673 type exitError struct { 674 code int 675 cause error 676 } 677 678 func (e *exitError) Error() string { 679 if e.cause != nil { 680 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 681 } 682 683 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 684 } 685 686 func (e *exitError) ExitCode() int { 687 return e.code 688 } 689 690 func (e *exitError) Cause() error { 691 return e.cause 692 } 693 694 // checkHealth blocks until unhealthy container is detected or ctx exits 695 func (r *controller) checkHealth(ctx context.Context) error { 696 eventq := r.adapter.events(ctx) 697 698 for { 699 select { 700 case <-ctx.Done(): 701 return nil 702 case <-r.closed: 703 return nil 704 case event := <-eventq: 705 if !r.matchevent(event) { 706 continue 707 } 708 709 switch event.Action { 710 case "health_status: unhealthy": 711 return ErrContainerUnhealthy 712 } 713 } 714 } 715 }