github.com/fabiokung/docker@v0.11.2-0.20170222101415-4534dcd49497/daemon/cluster/executor/container/controller.go (about) 1 package container 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "os" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/events" 16 executorpkg "github.com/docker/docker/daemon/cluster/executor" 17 "github.com/docker/go-connections/nat" 18 "github.com/docker/libnetwork" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 "github.com/docker/swarmkit/log" 22 gogotypes "github.com/gogo/protobuf/types" 23 "github.com/pkg/errors" 24 "golang.org/x/net/context" 25 "golang.org/x/time/rate" 26 ) 27 28 // controller implements agent.Controller against docker's API. 29 // 30 // Most operations against docker's API are done through the container name, 31 // which is unique to the task. 32 type controller struct { 33 task *api.Task 34 adapter *containerAdapter 35 closed chan struct{} 36 err error 37 38 pulled chan struct{} // closed after pull 39 cancelPull func() // cancels pull context if not nil 40 pullErr error // pull error, only read after pulled closed 41 } 42 43 var _ exec.Controller = &controller{} 44 45 // NewController returns a docker exec runner for the provided task. 46 func newController(b executorpkg.Backend, task *api.Task, secrets exec.SecretGetter) (*controller, error) { 47 adapter, err := newContainerAdapter(b, task, secrets) 48 if err != nil { 49 return nil, err 50 } 51 52 return &controller{ 53 task: task, 54 adapter: adapter, 55 closed: make(chan struct{}), 56 }, nil 57 } 58 59 func (r *controller) Task() (*api.Task, error) { 60 return r.task, nil 61 } 62 63 // ContainerStatus returns the container-specific status for the task. 64 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 65 ctnr, err := r.adapter.inspect(ctx) 66 if err != nil { 67 if isUnknownContainer(err) { 68 return nil, nil 69 } 70 return nil, err 71 } 72 return parseContainerStatus(ctnr) 73 } 74 75 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 76 ctnr, err := r.adapter.inspect(ctx) 77 if err != nil { 78 if isUnknownContainer(err) { 79 return nil, nil 80 } 81 82 return nil, err 83 } 84 85 return parsePortStatus(ctnr) 86 } 87 88 // Update tasks a recent task update and applies it to the container. 89 func (r *controller) Update(ctx context.Context, t *api.Task) error { 90 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 91 // updates of metadata, such as labelling, as well as any other properties 92 // that make sense. 93 return nil 94 } 95 96 // Prepare creates a container and ensures the image is pulled. 97 // 98 // If the container has already be created, exec.ErrTaskPrepared is returned. 99 func (r *controller) Prepare(ctx context.Context) error { 100 if err := r.checkClosed(); err != nil { 101 return err 102 } 103 104 // Make sure all the networks that the task needs are created. 105 if err := r.adapter.createNetworks(ctx); err != nil { 106 return err 107 } 108 109 // Make sure all the volumes that the task needs are created. 110 if err := r.adapter.createVolumes(ctx); err != nil { 111 return err 112 } 113 114 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 115 if r.pulled == nil { 116 // Fork the pull to a different context to allow pull to continue 117 // on re-entrant calls to Prepare. This ensures that Prepare can be 118 // idempotent and not incur the extra cost of pulling when 119 // cancelled on updates. 120 var pctx context.Context 121 122 r.pulled = make(chan struct{}) 123 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 124 125 go func() { 126 defer close(r.pulled) 127 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 128 }() 129 } 130 131 select { 132 case <-ctx.Done(): 133 return ctx.Err() 134 case <-r.pulled: 135 if r.pullErr != nil { 136 // NOTE(stevvooe): We always try to pull the image to make sure we have 137 // the most up to date version. This will return an error, but we only 138 // log it. If the image truly doesn't exist, the create below will 139 // error out. 140 // 141 // This gives us some nice behavior where we use up to date versions of 142 // mutable tags, but will still run if the old image is available but a 143 // registry is down. 144 // 145 // If you don't want this behavior, lock down your image to an 146 // immutable tag or digest. 147 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 148 } 149 } 150 } 151 152 if err := r.adapter.create(ctx); err != nil { 153 if isContainerCreateNameConflict(err) { 154 if _, err := r.adapter.inspect(ctx); err != nil { 155 return err 156 } 157 158 // container is already created. success! 159 return exec.ErrTaskPrepared 160 } 161 162 return err 163 } 164 165 return nil 166 } 167 168 // Start the container. An error will be returned if the container is already started. 169 func (r *controller) Start(ctx context.Context) error { 170 if err := r.checkClosed(); err != nil { 171 return err 172 } 173 174 ctnr, err := r.adapter.inspect(ctx) 175 if err != nil { 176 return err 177 } 178 179 // Detect whether the container has *ever* been started. If so, we don't 180 // issue the start. 181 // 182 // TODO(stevvooe): This is very racy. While reading inspect, another could 183 // start the process and we could end up starting it twice. 184 if ctnr.State.Status != "created" { 185 return exec.ErrTaskStarted 186 } 187 188 for { 189 if err := r.adapter.start(ctx); err != nil { 190 if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok { 191 // Retry network creation again if we 192 // failed because some of the networks 193 // were not found. 194 if err := r.adapter.createNetworks(ctx); err != nil { 195 return err 196 } 197 198 continue 199 } 200 201 return errors.Wrap(err, "starting container failed") 202 } 203 204 break 205 } 206 207 // no health check 208 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 209 if err := r.adapter.activateServiceBinding(); err != nil { 210 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 211 return err 212 } 213 return nil 214 } 215 216 // wait for container to be healthy 217 eventq := r.adapter.events(ctx) 218 219 var healthErr error 220 for { 221 select { 222 case event := <-eventq: 223 if !r.matchevent(event) { 224 continue 225 } 226 227 switch event.Action { 228 case "die": // exit on terminal events 229 ctnr, err := r.adapter.inspect(ctx) 230 if err != nil { 231 return errors.Wrap(err, "die event received") 232 } else if ctnr.State.ExitCode != 0 { 233 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 234 } 235 236 return nil 237 case "destroy": 238 // If we get here, something has gone wrong but we want to exit 239 // and report anyways. 240 return ErrContainerDestroyed 241 case "health_status: unhealthy": 242 // in this case, we stop the container and report unhealthy status 243 if err := r.Shutdown(ctx); err != nil { 244 return errors.Wrap(err, "unhealthy container shutdown failed") 245 } 246 // set health check error, and wait for container to fully exit ("die" event) 247 healthErr = ErrContainerUnhealthy 248 case "health_status: healthy": 249 if err := r.adapter.activateServiceBinding(); err != nil { 250 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 251 return err 252 } 253 return nil 254 } 255 case <-ctx.Done(): 256 return ctx.Err() 257 case <-r.closed: 258 return r.err 259 } 260 } 261 } 262 263 // Wait on the container to exit. 264 func (r *controller) Wait(pctx context.Context) error { 265 if err := r.checkClosed(); err != nil { 266 return err 267 } 268 269 ctx, cancel := context.WithCancel(pctx) 270 defer cancel() 271 272 healthErr := make(chan error, 1) 273 go func() { 274 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 275 defer cancel() 276 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 277 healthErr <- ErrContainerUnhealthy 278 if err := r.Shutdown(ectx); err != nil { 279 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 280 } 281 } 282 }() 283 284 err := r.adapter.wait(ctx) 285 if ctx.Err() != nil { 286 return ctx.Err() 287 } 288 289 if err != nil { 290 ee := &exitError{} 291 if ec, ok := err.(exec.ExitCoder); ok { 292 ee.code = ec.ExitCode() 293 } 294 select { 295 case e := <-healthErr: 296 ee.cause = e 297 default: 298 if err.Error() != "" { 299 ee.cause = err 300 } 301 } 302 return ee 303 } 304 305 return nil 306 } 307 308 // Shutdown the container cleanly. 309 func (r *controller) Shutdown(ctx context.Context) error { 310 if err := r.checkClosed(); err != nil { 311 return err 312 } 313 314 if r.cancelPull != nil { 315 r.cancelPull() 316 } 317 318 // remove container from service binding 319 if err := r.adapter.deactivateServiceBinding(); err != nil { 320 log.G(ctx).WithError(err).Errorf("failed to deactivate service binding for container %s", r.adapter.container.name()) 321 return err 322 } 323 324 if err := r.adapter.shutdown(ctx); err != nil { 325 if isUnknownContainer(err) || isStoppedContainer(err) { 326 return nil 327 } 328 329 return err 330 } 331 332 return nil 333 } 334 335 // Terminate the container, with force. 336 func (r *controller) Terminate(ctx context.Context) error { 337 if err := r.checkClosed(); err != nil { 338 return err 339 } 340 341 if r.cancelPull != nil { 342 r.cancelPull() 343 } 344 345 if err := r.adapter.terminate(ctx); err != nil { 346 if isUnknownContainer(err) { 347 return nil 348 } 349 350 return err 351 } 352 353 return nil 354 } 355 356 // Remove the container and its resources. 357 func (r *controller) Remove(ctx context.Context) error { 358 if err := r.checkClosed(); err != nil { 359 return err 360 } 361 362 if r.cancelPull != nil { 363 r.cancelPull() 364 } 365 366 // It may be necessary to shut down the task before removing it. 367 if err := r.Shutdown(ctx); err != nil { 368 if isUnknownContainer(err) { 369 return nil 370 } 371 // This may fail if the task was already shut down. 372 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 373 } 374 375 // Try removing networks referenced in this task in case this 376 // task is the last one referencing it 377 if err := r.adapter.removeNetworks(ctx); err != nil { 378 if isUnknownContainer(err) { 379 return nil 380 } 381 return err 382 } 383 384 if err := r.adapter.remove(ctx); err != nil { 385 if isUnknownContainer(err) { 386 return nil 387 } 388 389 return err 390 } 391 return nil 392 } 393 394 // waitReady waits for a container to be "ready". 395 // Ready means it's past the started state. 396 func (r *controller) waitReady(pctx context.Context) error { 397 if err := r.checkClosed(); err != nil { 398 return err 399 } 400 401 ctx, cancel := context.WithCancel(pctx) 402 defer cancel() 403 404 eventq := r.adapter.events(ctx) 405 406 ctnr, err := r.adapter.inspect(ctx) 407 if err != nil { 408 if !isUnknownContainer(err) { 409 return errors.Wrap(err, "inspect container failed") 410 } 411 } else { 412 switch ctnr.State.Status { 413 case "running", "exited", "dead": 414 return nil 415 } 416 } 417 418 for { 419 select { 420 case event := <-eventq: 421 if !r.matchevent(event) { 422 continue 423 } 424 425 switch event.Action { 426 case "start": 427 return nil 428 } 429 case <-ctx.Done(): 430 return ctx.Err() 431 case <-r.closed: 432 return r.err 433 } 434 } 435 } 436 437 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 438 if err := r.checkClosed(); err != nil { 439 return err 440 } 441 442 if err := r.waitReady(ctx); err != nil { 443 return errors.Wrap(err, "container not ready for logs") 444 } 445 446 rc, err := r.adapter.logs(ctx, options) 447 if err != nil { 448 return errors.Wrap(err, "failed getting container logs") 449 } 450 defer rc.Close() 451 452 var ( 453 // use a rate limiter to keep things under control but also provides some 454 // ability coalesce messages. 455 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 456 msgctx = api.LogContext{ 457 NodeID: r.task.NodeID, 458 ServiceID: r.task.ServiceID, 459 TaskID: r.task.ID, 460 } 461 ) 462 463 brd := bufio.NewReader(rc) 464 for { 465 // so, message header is 8 bytes, treat as uint64, pull stream off MSB 466 var header uint64 467 if err := binary.Read(brd, binary.BigEndian, &header); err != nil { 468 if err == io.EOF { 469 return nil 470 } 471 472 return errors.Wrap(err, "failed reading log header") 473 } 474 475 stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3)) 476 477 // limit here to decrease allocation back pressure. 478 if err := limiter.WaitN(ctx, int(size)); err != nil { 479 return errors.Wrap(err, "failed rate limiter") 480 } 481 482 buf := make([]byte, size) 483 _, err := io.ReadFull(brd, buf) 484 if err != nil { 485 return errors.Wrap(err, "failed reading buffer") 486 } 487 488 // Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish 489 parts := bytes.SplitN(buf, []byte(" "), 2) 490 if len(parts) != 2 { 491 return fmt.Errorf("invalid timestamp in log message: %v", buf) 492 } 493 494 ts, err := time.Parse(time.RFC3339Nano, string(parts[0])) 495 if err != nil { 496 return errors.Wrap(err, "failed to parse timestamp") 497 } 498 499 tsp, err := gogotypes.TimestampProto(ts) 500 if err != nil { 501 return errors.Wrap(err, "failed to convert timestamp") 502 } 503 504 if err := publisher.Publish(ctx, api.LogMessage{ 505 Context: msgctx, 506 Timestamp: tsp, 507 Stream: api.LogStream(stream), 508 509 Data: parts[1], 510 }); err != nil { 511 return errors.Wrap(err, "failed to publish log message") 512 } 513 } 514 } 515 516 // Close the runner and clean up any ephemeral resources. 517 func (r *controller) Close() error { 518 select { 519 case <-r.closed: 520 return r.err 521 default: 522 if r.cancelPull != nil { 523 r.cancelPull() 524 } 525 526 r.err = exec.ErrControllerClosed 527 close(r.closed) 528 } 529 return nil 530 } 531 532 func (r *controller) matchevent(event events.Message) bool { 533 if event.Type != events.ContainerEventType { 534 return false 535 } 536 537 // TODO(stevvooe): Filter based on ID matching, in addition to name. 538 539 // Make sure the events are for this container. 540 if event.Actor.Attributes["name"] != r.adapter.container.name() { 541 return false 542 } 543 544 return true 545 } 546 547 func (r *controller) checkClosed() error { 548 select { 549 case <-r.closed: 550 return r.err 551 default: 552 return nil 553 } 554 } 555 556 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 557 status := &api.ContainerStatus{ 558 ContainerID: ctnr.ID, 559 PID: int32(ctnr.State.Pid), 560 ExitCode: int32(ctnr.State.ExitCode), 561 } 562 563 return status, nil 564 } 565 566 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 567 status := &api.PortStatus{} 568 569 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 570 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 571 if err != nil { 572 return nil, err 573 } 574 status.Ports = exposedPorts 575 } 576 577 return status, nil 578 } 579 580 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 581 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 582 583 for portProtocol, mapping := range portMap { 584 parts := strings.SplitN(string(portProtocol), "/", 2) 585 if len(parts) != 2 { 586 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 587 } 588 589 port, err := strconv.ParseUint(parts[0], 10, 16) 590 if err != nil { 591 return nil, err 592 } 593 594 protocol := api.ProtocolTCP 595 switch strings.ToLower(parts[1]) { 596 case "tcp": 597 protocol = api.ProtocolTCP 598 case "udp": 599 protocol = api.ProtocolUDP 600 default: 601 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 602 } 603 604 for _, binding := range mapping { 605 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 606 if err != nil { 607 return nil, err 608 } 609 610 // TODO(aluzzardi): We're losing the port `name` here since 611 // there's no way to retrieve it back from the Engine. 612 exposedPorts = append(exposedPorts, &api.PortConfig{ 613 PublishMode: api.PublishModeHost, 614 Protocol: protocol, 615 TargetPort: uint32(port), 616 PublishedPort: uint32(hostPort), 617 }) 618 } 619 } 620 621 return exposedPorts, nil 622 } 623 624 type exitError struct { 625 code int 626 cause error 627 } 628 629 func (e *exitError) Error() string { 630 if e.cause != nil { 631 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 632 } 633 634 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 635 } 636 637 func (e *exitError) ExitCode() int { 638 return int(e.code) 639 } 640 641 func (e *exitError) Cause() error { 642 return e.cause 643 } 644 645 // checkHealth blocks until unhealthy container is detected or ctx exits 646 func (r *controller) checkHealth(ctx context.Context) error { 647 eventq := r.adapter.events(ctx) 648 649 for { 650 select { 651 case <-ctx.Done(): 652 return nil 653 case <-r.closed: 654 return nil 655 case event := <-eventq: 656 if !r.matchevent(event) { 657 continue 658 } 659 660 switch event.Action { 661 case "health_status: unhealthy": 662 return ErrContainerUnhealthy 663 } 664 } 665 } 666 }