github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/executor/container/controller.go (about) 1 package container // import "github.com/docker/docker/daemon/cluster/executor/container" 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/docker/docker/api/types" 12 "github.com/docker/docker/api/types/events" 13 executorpkg "github.com/docker/docker/daemon/cluster/executor" 14 "github.com/docker/go-connections/nat" 15 "github.com/docker/libnetwork" 16 "github.com/docker/swarmkit/agent/exec" 17 "github.com/docker/swarmkit/api" 18 "github.com/docker/swarmkit/log" 19 gogotypes "github.com/gogo/protobuf/types" 20 "github.com/pkg/errors" 21 "golang.org/x/time/rate" 22 ) 23 24 const defaultGossipConvergeDelay = 2 * time.Second 25 26 // waitNodeAttachmentsTimeout defines the total period of time we should wait 27 // for node attachments to be ready before giving up on starting a task 28 const waitNodeAttachmentsTimeout = 30 * time.Second 29 30 // controller implements agent.Controller against docker's API. 31 // 32 // Most operations against docker's API are done through the container name, 33 // which is unique to the task. 34 type controller struct { 35 task *api.Task 36 adapter *containerAdapter 37 closed chan struct{} 38 err error 39 pulled chan struct{} // closed after pull 40 cancelPull func() // cancels pull context if not nil 41 pullErr error // pull error, only read after pulled closed 42 } 43 44 var _ exec.Controller = &controller{} 45 46 // NewController returns a docker exec runner for the provided task. 47 func newController(b executorpkg.Backend, i executorpkg.ImageBackend, v executorpkg.VolumeBackend, task *api.Task, node *api.NodeDescription, dependencies exec.DependencyGetter) (*controller, error) { 48 adapter, err := newContainerAdapter(b, i, v, task, node, dependencies) 49 if err != nil { 50 return nil, err 51 } 52 53 return &controller{ 54 task: task, 55 adapter: adapter, 56 closed: make(chan struct{}), 57 }, nil 58 } 59 60 func (r *controller) Task() (*api.Task, error) { 61 return r.task, nil 62 } 63 64 // ContainerStatus returns the container-specific status for the task. 65 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 66 ctnr, err := r.adapter.inspect(ctx) 67 if err != nil { 68 if isUnknownContainer(err) { 69 return nil, nil 70 } 71 return nil, err 72 } 73 return parseContainerStatus(ctnr) 74 } 75 76 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 77 ctnr, err := r.adapter.inspect(ctx) 78 if err != nil { 79 if isUnknownContainer(err) { 80 return nil, nil 81 } 82 83 return nil, err 84 } 85 86 return parsePortStatus(ctnr) 87 } 88 89 // Update tasks a recent task update and applies it to the container. 90 func (r *controller) Update(ctx context.Context, t *api.Task) error { 91 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 92 // updates of metadata, such as labelling, as well as any other properties 93 // that make sense. 94 return nil 95 } 96 97 // Prepare creates a container and ensures the image is pulled. 98 // 99 // If the container has already be created, exec.ErrTaskPrepared is returned. 100 func (r *controller) Prepare(ctx context.Context) error { 101 if err := r.checkClosed(); err != nil { 102 return err 103 } 104 105 // Before we create networks, we need to make sure that the node has all of 106 // the network attachments that the task needs. This will block until that 107 // is the case or the context has expired. 108 // NOTE(dperny): Prepare doesn't time out on its own (that is, the context 109 // passed in does not expire after any period of time), which means if the 110 // node attachment never arrives (for example, if the network's IP address 111 // space is exhausted), then the tasks on the node will park in PREPARING 112 // forever (or until the node dies). To avoid this case, we create a new 113 // context with a fixed deadline, and give up. In normal operation, a node 114 // update with the node IP address should come in hot on the tail of the 115 // task being assigned to the node, and this should exit on the order of 116 // milliseconds, but to be extra conservative we'll give it 30 seconds to 117 // time out before giving up. 118 waitNodeAttachmentsContext, waitCancel := context.WithTimeout(ctx, waitNodeAttachmentsTimeout) 119 defer waitCancel() 120 if err := r.adapter.waitNodeAttachments(waitNodeAttachmentsContext); err != nil { 121 return err 122 } 123 124 // Make sure all the networks that the task needs are created. 125 if err := r.adapter.createNetworks(ctx); err != nil { 126 return err 127 } 128 129 // Make sure all the volumes that the task needs are created. 130 if err := r.adapter.createVolumes(ctx); err != nil { 131 return err 132 } 133 134 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 135 if r.pulled == nil { 136 // Fork the pull to a different context to allow pull to continue 137 // on re-entrant calls to Prepare. This ensures that Prepare can be 138 // idempotent and not incur the extra cost of pulling when 139 // cancelled on updates. 140 var pctx context.Context 141 142 r.pulled = make(chan struct{}) 143 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 144 145 go func() { 146 defer close(r.pulled) 147 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 148 }() 149 } 150 151 select { 152 case <-ctx.Done(): 153 return ctx.Err() 154 case <-r.pulled: 155 if r.pullErr != nil { 156 // NOTE(stevvooe): We always try to pull the image to make sure we have 157 // the most up to date version. This will return an error, but we only 158 // log it. If the image truly doesn't exist, the create below will 159 // error out. 160 // 161 // This gives us some nice behavior where we use up to date versions of 162 // mutable tags, but will still run if the old image is available but a 163 // registry is down. 164 // 165 // If you don't want this behavior, lock down your image to an 166 // immutable tag or digest. 167 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 168 } 169 } 170 } 171 if err := r.adapter.create(ctx); err != nil { 172 if isContainerCreateNameConflict(err) { 173 if _, err := r.adapter.inspect(ctx); err != nil { 174 return err 175 } 176 177 // container is already created. success! 178 return exec.ErrTaskPrepared 179 } 180 181 return err 182 } 183 184 return nil 185 } 186 187 // Start the container. An error will be returned if the container is already started. 188 func (r *controller) Start(ctx context.Context) error { 189 if err := r.checkClosed(); err != nil { 190 return err 191 } 192 193 ctnr, err := r.adapter.inspect(ctx) 194 if err != nil { 195 return err 196 } 197 198 // Detect whether the container has *ever* been started. If so, we don't 199 // issue the start. 200 // 201 // TODO(stevvooe): This is very racy. While reading inspect, another could 202 // start the process and we could end up starting it twice. 203 if ctnr.State.Status != "created" { 204 return exec.ErrTaskStarted 205 } 206 207 for { 208 if err := r.adapter.start(ctx); err != nil { 209 if _, ok := errors.Cause(err).(libnetwork.ErrNoSuchNetwork); ok { 210 // Retry network creation again if we 211 // failed because some of the networks 212 // were not found. 213 if err := r.adapter.createNetworks(ctx); err != nil { 214 return err 215 } 216 217 continue 218 } 219 220 return errors.Wrap(err, "starting container failed") 221 } 222 223 break 224 } 225 226 // no health check 227 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 228 if err := r.adapter.activateServiceBinding(); err != nil { 229 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 230 return err 231 } 232 return nil 233 } 234 235 // wait for container to be healthy 236 eventq := r.adapter.events(ctx) 237 238 var healthErr error 239 for { 240 select { 241 case event := <-eventq: 242 if !r.matchevent(event) { 243 continue 244 } 245 246 switch event.Action { 247 case "die": // exit on terminal events 248 ctnr, err := r.adapter.inspect(ctx) 249 if err != nil { 250 return errors.Wrap(err, "die event received") 251 } else if ctnr.State.ExitCode != 0 { 252 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 253 } 254 255 return nil 256 case "destroy": 257 // If we get here, something has gone wrong but we want to exit 258 // and report anyways. 259 return ErrContainerDestroyed 260 case "health_status: unhealthy": 261 // in this case, we stop the container and report unhealthy status 262 if err := r.Shutdown(ctx); err != nil { 263 return errors.Wrap(err, "unhealthy container shutdown failed") 264 } 265 // set health check error, and wait for container to fully exit ("die" event) 266 healthErr = ErrContainerUnhealthy 267 case "health_status: healthy": 268 if err := r.adapter.activateServiceBinding(); err != nil { 269 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 270 return err 271 } 272 return nil 273 } 274 case <-ctx.Done(): 275 return ctx.Err() 276 case <-r.closed: 277 return r.err 278 } 279 } 280 } 281 282 // Wait on the container to exit. 283 func (r *controller) Wait(pctx context.Context) error { 284 if err := r.checkClosed(); err != nil { 285 return err 286 } 287 288 ctx, cancel := context.WithCancel(pctx) 289 defer cancel() 290 291 healthErr := make(chan error, 1) 292 go func() { 293 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 294 defer cancel() 295 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 296 healthErr <- ErrContainerUnhealthy 297 if err := r.Shutdown(ectx); err != nil { 298 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 299 } 300 } 301 }() 302 303 waitC, err := r.adapter.wait(ctx) 304 if err != nil { 305 return err 306 } 307 308 if status := <-waitC; status.ExitCode() != 0 { 309 exitErr := &exitError{ 310 code: status.ExitCode(), 311 } 312 313 // Set the cause if it is knowable. 314 select { 315 case e := <-healthErr: 316 exitErr.cause = e 317 default: 318 if status.Err() != nil { 319 exitErr.cause = status.Err() 320 } 321 } 322 323 return exitErr 324 } 325 326 return nil 327 } 328 329 func (r *controller) hasServiceBinding() bool { 330 if r.task == nil { 331 return false 332 } 333 334 // service is attached to a network besides the default bridge 335 for _, na := range r.task.Networks { 336 if na.Network == nil || 337 na.Network.DriverState == nil || 338 na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" { 339 continue 340 } 341 return true 342 } 343 344 return false 345 } 346 347 // Shutdown the container cleanly. 348 func (r *controller) Shutdown(ctx context.Context) error { 349 if err := r.checkClosed(); err != nil { 350 return err 351 } 352 353 if r.cancelPull != nil { 354 r.cancelPull() 355 } 356 357 if r.hasServiceBinding() { 358 // remove container from service binding 359 if err := r.adapter.deactivateServiceBinding(); err != nil { 360 log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name()) 361 // Don't return an error here, because failure to deactivate 362 // the service binding is expected if the container was never 363 // started. 364 } 365 366 // add a delay for gossip converge 367 // TODO(dongluochen): this delay should be configurable to fit different cluster size and network delay. 368 time.Sleep(defaultGossipConvergeDelay) 369 } 370 371 if err := r.adapter.shutdown(ctx); err != nil { 372 if !(isUnknownContainer(err) || isStoppedContainer(err)) { 373 return err 374 } 375 } 376 377 // Try removing networks referenced in this task in case this 378 // task is the last one referencing it 379 if err := r.adapter.removeNetworks(ctx); err != nil { 380 if !isUnknownContainer(err) { 381 return err 382 } 383 } 384 385 return nil 386 } 387 388 // Terminate the container, with force. 389 func (r *controller) Terminate(ctx context.Context) error { 390 if err := r.checkClosed(); err != nil { 391 return err 392 } 393 394 if r.cancelPull != nil { 395 r.cancelPull() 396 } 397 398 if err := r.adapter.terminate(ctx); err != nil { 399 if isUnknownContainer(err) { 400 return nil 401 } 402 403 return err 404 } 405 406 return nil 407 } 408 409 // Remove the container and its resources. 410 func (r *controller) Remove(ctx context.Context) error { 411 if err := r.checkClosed(); err != nil { 412 return err 413 } 414 415 if r.cancelPull != nil { 416 r.cancelPull() 417 } 418 419 // It may be necessary to shut down the task before removing it. 420 if err := r.Shutdown(ctx); err != nil { 421 if isUnknownContainer(err) { 422 return nil 423 } 424 // This may fail if the task was already shut down. 425 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 426 } 427 428 if err := r.adapter.remove(ctx); err != nil { 429 if isUnknownContainer(err) { 430 return nil 431 } 432 433 return err 434 } 435 return nil 436 } 437 438 // waitReady waits for a container to be "ready". 439 // Ready means it's past the started state. 440 func (r *controller) waitReady(pctx context.Context) error { 441 if err := r.checkClosed(); err != nil { 442 return err 443 } 444 445 ctx, cancel := context.WithCancel(pctx) 446 defer cancel() 447 448 eventq := r.adapter.events(ctx) 449 450 ctnr, err := r.adapter.inspect(ctx) 451 if err != nil { 452 if !isUnknownContainer(err) { 453 return errors.Wrap(err, "inspect container failed") 454 } 455 } else { 456 switch ctnr.State.Status { 457 case "running", "exited", "dead": 458 return nil 459 } 460 } 461 462 for { 463 select { 464 case event := <-eventq: 465 if !r.matchevent(event) { 466 continue 467 } 468 469 switch event.Action { 470 case "start": 471 return nil 472 } 473 case <-ctx.Done(): 474 return ctx.Err() 475 case <-r.closed: 476 return r.err 477 } 478 } 479 } 480 481 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 482 if err := r.checkClosed(); err != nil { 483 return err 484 } 485 486 // if we're following, wait for this container to be ready. there is a 487 // problem here: if the container will never be ready (for example, it has 488 // been totally deleted) then this will wait forever. however, this doesn't 489 // actually cause any UI issues, and shouldn't be a problem. the stuck wait 490 // will go away when the follow (context) is canceled. 491 if options.Follow { 492 if err := r.waitReady(ctx); err != nil { 493 return errors.Wrap(err, "container not ready for logs") 494 } 495 } 496 // if we're not following, we're not gonna wait for the container to be 497 // ready. just call logs. if the container isn't ready, the call will fail 498 // and return an error. no big deal, we don't care, we only want the logs 499 // we can get RIGHT NOW with no follow 500 501 logsContext, cancel := context.WithCancel(ctx) 502 msgs, err := r.adapter.logs(logsContext, options) 503 defer cancel() 504 if err != nil { 505 return errors.Wrap(err, "failed getting container logs") 506 } 507 508 var ( 509 // use a rate limiter to keep things under control but also provides some 510 // ability coalesce messages. 511 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 512 msgctx = api.LogContext{ 513 NodeID: r.task.NodeID, 514 ServiceID: r.task.ServiceID, 515 TaskID: r.task.ID, 516 } 517 ) 518 519 for { 520 msg, ok := <-msgs 521 if !ok { 522 // we're done here, no more messages 523 return nil 524 } 525 526 if msg.Err != nil { 527 // the defered cancel closes the adapter's log stream 528 return msg.Err 529 } 530 531 // wait here for the limiter to catch up 532 if err := limiter.WaitN(ctx, len(msg.Line)); err != nil { 533 return errors.Wrap(err, "failed rate limiter") 534 } 535 tsp, err := gogotypes.TimestampProto(msg.Timestamp) 536 if err != nil { 537 return errors.Wrap(err, "failed to convert timestamp") 538 } 539 var stream api.LogStream 540 if msg.Source == "stdout" { 541 stream = api.LogStreamStdout 542 } else if msg.Source == "stderr" { 543 stream = api.LogStreamStderr 544 } 545 546 // parse the details out of the Attrs map 547 var attrs []api.LogAttr 548 if len(msg.Attrs) != 0 { 549 attrs = make([]api.LogAttr, 0, len(msg.Attrs)) 550 for _, attr := range msg.Attrs { 551 attrs = append(attrs, api.LogAttr{Key: attr.Key, Value: attr.Value}) 552 } 553 } 554 555 if err := publisher.Publish(ctx, api.LogMessage{ 556 Context: msgctx, 557 Timestamp: tsp, 558 Stream: stream, 559 Attrs: attrs, 560 Data: msg.Line, 561 }); err != nil { 562 return errors.Wrap(err, "failed to publish log message") 563 } 564 } 565 } 566 567 // Close the runner and clean up any ephemeral resources. 568 func (r *controller) Close() error { 569 select { 570 case <-r.closed: 571 return r.err 572 default: 573 if r.cancelPull != nil { 574 r.cancelPull() 575 } 576 577 r.err = exec.ErrControllerClosed 578 close(r.closed) 579 } 580 return nil 581 } 582 583 func (r *controller) matchevent(event events.Message) bool { 584 if event.Type != events.ContainerEventType { 585 return false 586 } 587 // we can't filter using id since it will have huge chances to introduce a deadlock. see #33377. 588 return event.Actor.Attributes["name"] == r.adapter.container.name() 589 } 590 591 func (r *controller) checkClosed() error { 592 select { 593 case <-r.closed: 594 return r.err 595 default: 596 return nil 597 } 598 } 599 600 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 601 status := &api.ContainerStatus{ 602 ContainerID: ctnr.ID, 603 PID: int32(ctnr.State.Pid), 604 ExitCode: int32(ctnr.State.ExitCode), 605 } 606 607 return status, nil 608 } 609 610 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 611 status := &api.PortStatus{} 612 613 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 614 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 615 if err != nil { 616 return nil, err 617 } 618 status.Ports = exposedPorts 619 } 620 621 return status, nil 622 } 623 624 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 625 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 626 627 for portProtocol, mapping := range portMap { 628 parts := strings.SplitN(string(portProtocol), "/", 2) 629 if len(parts) != 2 { 630 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 631 } 632 633 port, err := strconv.ParseUint(parts[0], 10, 16) 634 if err != nil { 635 return nil, err 636 } 637 638 protocol := api.ProtocolTCP 639 switch strings.ToLower(parts[1]) { 640 case "tcp": 641 protocol = api.ProtocolTCP 642 case "udp": 643 protocol = api.ProtocolUDP 644 case "sctp": 645 protocol = api.ProtocolSCTP 646 default: 647 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 648 } 649 650 for _, binding := range mapping { 651 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 652 if err != nil { 653 return nil, err 654 } 655 656 // TODO(aluzzardi): We're losing the port `name` here since 657 // there's no way to retrieve it back from the Engine. 658 exposedPorts = append(exposedPorts, &api.PortConfig{ 659 PublishMode: api.PublishModeHost, 660 Protocol: protocol, 661 TargetPort: uint32(port), 662 PublishedPort: uint32(hostPort), 663 }) 664 } 665 } 666 667 return exposedPorts, nil 668 } 669 670 type exitError struct { 671 code int 672 cause error 673 } 674 675 func (e *exitError) Error() string { 676 if e.cause != nil { 677 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 678 } 679 680 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 681 } 682 683 func (e *exitError) ExitCode() int { 684 return e.code 685 } 686 687 func (e *exitError) Cause() error { 688 return e.cause 689 } 690 691 // checkHealth blocks until unhealthy container is detected or ctx exits 692 func (r *controller) checkHealth(ctx context.Context) error { 693 eventq := r.adapter.events(ctx) 694 695 for { 696 select { 697 case <-ctx.Done(): 698 return nil 699 case <-r.closed: 700 return nil 701 case event := <-eventq: 702 if !r.matchevent(event) { 703 continue 704 } 705 706 switch event.Action { 707 case "health_status: unhealthy": 708 return ErrContainerUnhealthy 709 } 710 } 711 } 712 }