github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/cluster/executor/container/controller.go (about) 1 package container 2 3 import ( 4 "fmt" 5 "os" 6 "strconv" 7 "strings" 8 "time" 9 10 "github.com/docker/docker/api/types" 11 "github.com/docker/docker/api/types/events" 12 executorpkg "github.com/docker/docker/daemon/cluster/executor" 13 "github.com/docker/go-connections/nat" 14 "github.com/docker/libnetwork" 15 "github.com/docker/swarmkit/agent/exec" 16 "github.com/docker/swarmkit/api" 17 "github.com/docker/swarmkit/log" 18 gogotypes "github.com/gogo/protobuf/types" 19 "github.com/pkg/errors" 20 "golang.org/x/net/context" 21 "golang.org/x/time/rate" 22 ) 23 24 const defaultGossipConvergeDelay = 2 * time.Second 25 26 // controller implements agent.Controller against docker's API. 27 // 28 // Most operations against docker's API are done through the container name, 29 // which is unique to the task. 30 type controller struct { 31 task *api.Task 32 adapter *containerAdapter 33 closed chan struct{} 34 err error 35 36 pulled chan struct{} // closed after pull 37 cancelPull func() // cancels pull context if not nil 38 pullErr error // pull error, only read after pulled closed 39 } 40 41 var _ exec.Controller = &controller{} 42 43 // NewController returns a docker exec runner for the provided task. 44 func newController(b executorpkg.Backend, task *api.Task, dependencies exec.DependencyGetter) (*controller, error) { 45 adapter, err := newContainerAdapter(b, task, dependencies) 46 if err != nil { 47 return nil, err 48 } 49 50 return &controller{ 51 task: task, 52 adapter: adapter, 53 closed: make(chan struct{}), 54 }, nil 55 } 56 57 func (r *controller) Task() (*api.Task, error) { 58 return r.task, nil 59 } 60 61 // ContainerStatus returns the container-specific status for the task. 62 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 63 ctnr, err := r.adapter.inspect(ctx) 64 if err != nil { 65 if isUnknownContainer(err) { 66 return nil, nil 67 } 68 return nil, err 69 } 70 return parseContainerStatus(ctnr) 71 } 72 73 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 74 ctnr, err := r.adapter.inspect(ctx) 75 if err != nil { 76 if isUnknownContainer(err) { 77 return nil, nil 78 } 79 80 return nil, err 81 } 82 83 return parsePortStatus(ctnr) 84 } 85 86 // Update tasks a recent task update and applies it to the container. 87 func (r *controller) Update(ctx context.Context, t *api.Task) error { 88 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 89 // updates of metadata, such as labelling, as well as any other properties 90 // that make sense. 91 return nil 92 } 93 94 // Prepare creates a container and ensures the image is pulled. 95 // 96 // If the container has already be created, exec.ErrTaskPrepared is returned. 97 func (r *controller) Prepare(ctx context.Context) error { 98 if err := r.checkClosed(); err != nil { 99 return err 100 } 101 102 // Make sure all the networks that the task needs are created. 103 if err := r.adapter.createNetworks(ctx); err != nil { 104 return err 105 } 106 107 // Make sure all the volumes that the task needs are created. 108 if err := r.adapter.createVolumes(ctx); err != nil { 109 return err 110 } 111 112 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 113 if r.pulled == nil { 114 // Fork the pull to a different context to allow pull to continue 115 // on re-entrant calls to Prepare. This ensures that Prepare can be 116 // idempotent and not incur the extra cost of pulling when 117 // cancelled on updates. 118 var pctx context.Context 119 120 r.pulled = make(chan struct{}) 121 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 122 123 go func() { 124 defer close(r.pulled) 125 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 126 }() 127 } 128 129 select { 130 case <-ctx.Done(): 131 return ctx.Err() 132 case <-r.pulled: 133 if r.pullErr != nil { 134 // NOTE(stevvooe): We always try to pull the image to make sure we have 135 // the most up to date version. This will return an error, but we only 136 // log it. If the image truly doesn't exist, the create below will 137 // error out. 138 // 139 // This gives us some nice behavior where we use up to date versions of 140 // mutable tags, but will still run if the old image is available but a 141 // registry is down. 142 // 143 // If you don't want this behavior, lock down your image to an 144 // immutable tag or digest. 145 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 146 } 147 } 148 } 149 150 if err := r.adapter.create(ctx); err != nil { 151 if isContainerCreateNameConflict(err) { 152 if _, err := r.adapter.inspect(ctx); err != nil { 153 return err 154 } 155 156 // container is already created. success! 157 return exec.ErrTaskPrepared 158 } 159 160 return err 161 } 162 163 return nil 164 } 165 166 // Start the container. An error will be returned if the container is already started. 167 func (r *controller) Start(ctx context.Context) error { 168 if err := r.checkClosed(); err != nil { 169 return err 170 } 171 172 ctnr, err := r.adapter.inspect(ctx) 173 if err != nil { 174 return err 175 } 176 177 // Detect whether the container has *ever* been started. If so, we don't 178 // issue the start. 179 // 180 // TODO(stevvooe): This is very racy. While reading inspect, another could 181 // start the process and we could end up starting it twice. 182 if ctnr.State.Status != "created" { 183 return exec.ErrTaskStarted 184 } 185 186 for { 187 if err := r.adapter.start(ctx); err != nil { 188 if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok { 189 // Retry network creation again if we 190 // failed because some of the networks 191 // were not found. 192 if err := r.adapter.createNetworks(ctx); err != nil { 193 return err 194 } 195 196 continue 197 } 198 199 return errors.Wrap(err, "starting container failed") 200 } 201 202 break 203 } 204 205 // no health check 206 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 207 if err := r.adapter.activateServiceBinding(); err != nil { 208 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 209 return err 210 } 211 return nil 212 } 213 214 // wait for container to be healthy 215 eventq := r.adapter.events(ctx) 216 217 var healthErr error 218 for { 219 select { 220 case event := <-eventq: 221 if !r.matchevent(event) { 222 continue 223 } 224 225 switch event.Action { 226 case "die": // exit on terminal events 227 ctnr, err := r.adapter.inspect(ctx) 228 if err != nil { 229 return errors.Wrap(err, "die event received") 230 } else if ctnr.State.ExitCode != 0 { 231 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 232 } 233 234 return nil 235 case "destroy": 236 // If we get here, something has gone wrong but we want to exit 237 // and report anyways. 238 return ErrContainerDestroyed 239 case "health_status: unhealthy": 240 // in this case, we stop the container and report unhealthy status 241 if err := r.Shutdown(ctx); err != nil { 242 return errors.Wrap(err, "unhealthy container shutdown failed") 243 } 244 // set health check error, and wait for container to fully exit ("die" event) 245 healthErr = ErrContainerUnhealthy 246 case "health_status: healthy": 247 if err := r.adapter.activateServiceBinding(); err != nil { 248 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 249 return err 250 } 251 return nil 252 } 253 case <-ctx.Done(): 254 return ctx.Err() 255 case <-r.closed: 256 return r.err 257 } 258 } 259 } 260 261 // Wait on the container to exit. 262 func (r *controller) Wait(pctx context.Context) error { 263 if err := r.checkClosed(); err != nil { 264 return err 265 } 266 267 ctx, cancel := context.WithCancel(pctx) 268 defer cancel() 269 270 healthErr := make(chan error, 1) 271 go func() { 272 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 273 defer cancel() 274 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 275 healthErr <- ErrContainerUnhealthy 276 if err := r.Shutdown(ectx); err != nil { 277 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 278 } 279 } 280 }() 281 282 waitC, err := r.adapter.wait(ctx) 283 if err != nil { 284 return err 285 } 286 287 if status := <-waitC; status.ExitCode() != 0 { 288 exitErr := &exitError{ 289 code: status.ExitCode(), 290 } 291 292 // Set the cause if it is knowable. 293 select { 294 case e := <-healthErr: 295 exitErr.cause = e 296 default: 297 if status.Err() != nil { 298 exitErr.cause = status.Err() 299 } 300 } 301 302 return exitErr 303 } 304 305 return nil 306 } 307 308 func (r *controller) hasServiceBinding() bool { 309 if r.task == nil { 310 return false 311 } 312 313 // service is attached to a network besides the default bridge 314 for _, na := range r.task.Networks { 315 if na.Network == nil || 316 na.Network.DriverState == nil || 317 na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" { 318 continue 319 } 320 return true 321 } 322 323 return false 324 } 325 326 // Shutdown the container cleanly. 327 func (r *controller) Shutdown(ctx context.Context) error { 328 if err := r.checkClosed(); err != nil { 329 return err 330 } 331 332 if r.cancelPull != nil { 333 r.cancelPull() 334 } 335 336 if r.hasServiceBinding() { 337 // remove container from service binding 338 if err := r.adapter.deactivateServiceBinding(); err != nil { 339 log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name()) 340 // Don't return an error here, because failure to deactivate 341 // the service binding is expected if the container was never 342 // started. 343 } 344 345 // add a delay for gossip converge 346 // TODO(dongluochen): this delay shoud be configurable to fit different cluster size and network delay. 347 time.Sleep(defaultGossipConvergeDelay) 348 } 349 350 if err := r.adapter.shutdown(ctx); err != nil { 351 if isUnknownContainer(err) || isStoppedContainer(err) { 352 return nil 353 } 354 355 return err 356 } 357 358 return nil 359 } 360 361 // Terminate the container, with force. 362 func (r *controller) Terminate(ctx context.Context) error { 363 if err := r.checkClosed(); err != nil { 364 return err 365 } 366 367 if r.cancelPull != nil { 368 r.cancelPull() 369 } 370 371 if err := r.adapter.terminate(ctx); err != nil { 372 if isUnknownContainer(err) { 373 return nil 374 } 375 376 return err 377 } 378 379 return nil 380 } 381 382 // Remove the container and its resources. 383 func (r *controller) Remove(ctx context.Context) error { 384 if err := r.checkClosed(); err != nil { 385 return err 386 } 387 388 if r.cancelPull != nil { 389 r.cancelPull() 390 } 391 392 // It may be necessary to shut down the task before removing it. 393 if err := r.Shutdown(ctx); err != nil { 394 if isUnknownContainer(err) { 395 return nil 396 } 397 // This may fail if the task was already shut down. 398 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 399 } 400 401 // Try removing networks referenced in this task in case this 402 // task is the last one referencing it 403 if err := r.adapter.removeNetworks(ctx); err != nil { 404 if isUnknownContainer(err) { 405 return nil 406 } 407 return err 408 } 409 410 if err := r.adapter.remove(ctx); err != nil { 411 if isUnknownContainer(err) { 412 return nil 413 } 414 415 return err 416 } 417 return nil 418 } 419 420 // waitReady waits for a container to be "ready". 421 // Ready means it's past the started state. 422 func (r *controller) waitReady(pctx context.Context) error { 423 if err := r.checkClosed(); err != nil { 424 return err 425 } 426 427 ctx, cancel := context.WithCancel(pctx) 428 defer cancel() 429 430 eventq := r.adapter.events(ctx) 431 432 ctnr, err := r.adapter.inspect(ctx) 433 if err != nil { 434 if !isUnknownContainer(err) { 435 return errors.Wrap(err, "inspect container failed") 436 } 437 } else { 438 switch ctnr.State.Status { 439 case "running", "exited", "dead": 440 return nil 441 } 442 } 443 444 for { 445 select { 446 case event := <-eventq: 447 if !r.matchevent(event) { 448 continue 449 } 450 451 switch event.Action { 452 case "start": 453 return nil 454 } 455 case <-ctx.Done(): 456 return ctx.Err() 457 case <-r.closed: 458 return r.err 459 } 460 } 461 } 462 463 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 464 if err := r.checkClosed(); err != nil { 465 return err 466 } 467 468 // if we're following, wait for this container to be ready. there is a 469 // problem here: if the container will never be ready (for example, it has 470 // been totally deleted) then this will wait forever. however, this doesn't 471 // actually cause any UI issues, and shouldn't be a problem. the stuck wait 472 // will go away when the follow (context) is canceled. 473 if options.Follow { 474 if err := r.waitReady(ctx); err != nil { 475 return errors.Wrap(err, "container not ready for logs") 476 } 477 } 478 // if we're not following, we're not gonna wait for the container to be 479 // ready. just call logs. if the container isn't ready, the call will fail 480 // and return an error. no big deal, we don't care, we only want the logs 481 // we can get RIGHT NOW with no follow 482 483 logsContext, cancel := context.WithCancel(ctx) 484 msgs, err := r.adapter.logs(logsContext, options) 485 defer cancel() 486 if err != nil { 487 return errors.Wrap(err, "failed getting container logs") 488 } 489 490 var ( 491 // use a rate limiter to keep things under control but also provides some 492 // ability coalesce messages. 493 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 494 msgctx = api.LogContext{ 495 NodeID: r.task.NodeID, 496 ServiceID: r.task.ServiceID, 497 TaskID: r.task.ID, 498 } 499 ) 500 501 for { 502 msg, ok := <-msgs 503 if !ok { 504 // we're done here, no more messages 505 return nil 506 } 507 508 if msg.Err != nil { 509 // the defered cancel closes the adapter's log stream 510 return msg.Err 511 } 512 513 // wait here for the limiter to catch up 514 if err := limiter.WaitN(ctx, len(msg.Line)); err != nil { 515 return errors.Wrap(err, "failed rate limiter") 516 } 517 tsp, err := gogotypes.TimestampProto(msg.Timestamp) 518 if err != nil { 519 return errors.Wrap(err, "failed to convert timestamp") 520 } 521 var stream api.LogStream 522 if msg.Source == "stdout" { 523 stream = api.LogStreamStdout 524 } else if msg.Source == "stderr" { 525 stream = api.LogStreamStderr 526 } 527 528 // parse the details out of the Attrs map 529 attrs := []api.LogAttr{} 530 for k, v := range msg.Attrs { 531 attr := api.LogAttr{Key: k, Value: v} 532 attrs = append(attrs, attr) 533 } 534 535 if err := publisher.Publish(ctx, api.LogMessage{ 536 Context: msgctx, 537 Timestamp: tsp, 538 Stream: stream, 539 Attrs: attrs, 540 Data: msg.Line, 541 }); err != nil { 542 return errors.Wrap(err, "failed to publish log message") 543 } 544 } 545 } 546 547 // Close the runner and clean up any ephemeral resources. 548 func (r *controller) Close() error { 549 select { 550 case <-r.closed: 551 return r.err 552 default: 553 if r.cancelPull != nil { 554 r.cancelPull() 555 } 556 557 r.err = exec.ErrControllerClosed 558 close(r.closed) 559 } 560 return nil 561 } 562 563 func (r *controller) matchevent(event events.Message) bool { 564 if event.Type != events.ContainerEventType { 565 return false 566 } 567 568 // TODO(stevvooe): Filter based on ID matching, in addition to name. 569 570 // Make sure the events are for this container. 571 if event.Actor.Attributes["name"] != r.adapter.container.name() { 572 return false 573 } 574 575 return true 576 } 577 578 func (r *controller) checkClosed() error { 579 select { 580 case <-r.closed: 581 return r.err 582 default: 583 return nil 584 } 585 } 586 587 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 588 status := &api.ContainerStatus{ 589 ContainerID: ctnr.ID, 590 PID: int32(ctnr.State.Pid), 591 ExitCode: int32(ctnr.State.ExitCode), 592 } 593 594 return status, nil 595 } 596 597 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 598 status := &api.PortStatus{} 599 600 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 601 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 602 if err != nil { 603 return nil, err 604 } 605 status.Ports = exposedPorts 606 } 607 608 return status, nil 609 } 610 611 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 612 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 613 614 for portProtocol, mapping := range portMap { 615 parts := strings.SplitN(string(portProtocol), "/", 2) 616 if len(parts) != 2 { 617 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 618 } 619 620 port, err := strconv.ParseUint(parts[0], 10, 16) 621 if err != nil { 622 return nil, err 623 } 624 625 protocol := api.ProtocolTCP 626 switch strings.ToLower(parts[1]) { 627 case "tcp": 628 protocol = api.ProtocolTCP 629 case "udp": 630 protocol = api.ProtocolUDP 631 default: 632 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 633 } 634 635 for _, binding := range mapping { 636 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 637 if err != nil { 638 return nil, err 639 } 640 641 // TODO(aluzzardi): We're losing the port `name` here since 642 // there's no way to retrieve it back from the Engine. 643 exposedPorts = append(exposedPorts, &api.PortConfig{ 644 PublishMode: api.PublishModeHost, 645 Protocol: protocol, 646 TargetPort: uint32(port), 647 PublishedPort: uint32(hostPort), 648 }) 649 } 650 } 651 652 return exposedPorts, nil 653 } 654 655 type exitError struct { 656 code int 657 cause error 658 } 659 660 func (e *exitError) Error() string { 661 if e.cause != nil { 662 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 663 } 664 665 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 666 } 667 668 func (e *exitError) ExitCode() int { 669 return int(e.code) 670 } 671 672 func (e *exitError) Cause() error { 673 return e.cause 674 } 675 676 // checkHealth blocks until unhealthy container is detected or ctx exits 677 func (r *controller) checkHealth(ctx context.Context) error { 678 eventq := r.adapter.events(ctx) 679 680 for { 681 select { 682 case <-ctx.Done(): 683 return nil 684 case <-r.closed: 685 return nil 686 case event := <-eventq: 687 if !r.matchevent(event) { 688 continue 689 } 690 691 switch event.Action { 692 case "health_status: unhealthy": 693 return ErrContainerUnhealthy 694 } 695 } 696 } 697 }