github.com/olljanat/moby@v1.13.1/daemon/cluster/executor/container/controller.go (about) 1 package container 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "os" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/events" 16 executorpkg "github.com/docker/docker/daemon/cluster/executor" 17 "github.com/docker/go-connections/nat" 18 "github.com/docker/libnetwork" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 "github.com/docker/swarmkit/log" 22 "github.com/docker/swarmkit/protobuf/ptypes" 23 "github.com/pkg/errors" 24 "golang.org/x/net/context" 25 "golang.org/x/time/rate" 26 ) 27 28 // controller implements agent.Controller against docker's API. 29 // 30 // Most operations against docker's API are done through the container name, 31 // which is unique to the task. 32 type controller struct { 33 task *api.Task 34 adapter *containerAdapter 35 closed chan struct{} 36 err error 37 38 pulled chan struct{} // closed after pull 39 cancelPull func() // cancels pull context if not nil 40 pullErr error // pull error, only read after pulled closed 41 } 42 43 var _ exec.Controller = &controller{} 44 45 // NewController returns a docker exec runner for the provided task. 46 func newController(b executorpkg.Backend, task *api.Task, secrets exec.SecretGetter) (*controller, error) { 47 adapter, err := newContainerAdapter(b, task, secrets) 48 if err != nil { 49 return nil, err 50 } 51 52 return &controller{ 53 task: task, 54 adapter: adapter, 55 closed: make(chan struct{}), 56 }, nil 57 } 58 59 func (r *controller) Task() (*api.Task, error) { 60 return r.task, nil 61 } 62 63 // ContainerStatus returns the container-specific status for the task. 64 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 65 ctnr, err := r.adapter.inspect(ctx) 66 if err != nil { 67 if isUnknownContainer(err) { 68 return nil, nil 69 } 70 return nil, err 71 } 72 return parseContainerStatus(ctnr) 73 } 74 75 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 76 ctnr, err := r.adapter.inspect(ctx) 77 if err != nil { 78 if isUnknownContainer(err) { 79 return nil, nil 80 } 81 82 return nil, err 83 } 84 85 return parsePortStatus(ctnr) 86 } 87 88 // Update tasks a recent task update and applies it to the container. 89 func (r *controller) Update(ctx context.Context, t *api.Task) error { 90 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 91 // updates of metadata, such as labelling, as well as any other properties 92 // that make sense. 93 return nil 94 } 95 96 // Prepare creates a container and ensures the image is pulled. 97 // 98 // If the container has already be created, exec.ErrTaskPrepared is returned. 99 func (r *controller) Prepare(ctx context.Context) error { 100 if err := r.checkClosed(); err != nil { 101 return err 102 } 103 104 // Make sure all the networks that the task needs are created. 105 if err := r.adapter.createNetworks(ctx); err != nil { 106 return err 107 } 108 109 // Make sure all the volumes that the task needs are created. 110 if err := r.adapter.createVolumes(ctx); err != nil { 111 return err 112 } 113 114 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 115 if r.pulled == nil { 116 // Fork the pull to a different context to allow pull to continue 117 // on re-entrant calls to Prepare. This ensures that Prepare can be 118 // idempotent and not incur the extra cost of pulling when 119 // cancelled on updates. 120 var pctx context.Context 121 122 r.pulled = make(chan struct{}) 123 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 124 125 go func() { 126 defer close(r.pulled) 127 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 128 }() 129 } 130 131 select { 132 case <-ctx.Done(): 133 return ctx.Err() 134 case <-r.pulled: 135 if r.pullErr != nil { 136 // NOTE(stevvooe): We always try to pull the image to make sure we have 137 // the most up to date version. This will return an error, but we only 138 // log it. If the image truly doesn't exist, the create below will 139 // error out. 140 // 141 // This gives us some nice behavior where we use up to date versions of 142 // mutable tags, but will still run if the old image is available but a 143 // registry is down. 144 // 145 // If you don't want this behavior, lock down your image to an 146 // immutable tag or digest. 147 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 148 } 149 } 150 } 151 152 if err := r.adapter.create(ctx); err != nil { 153 if isContainerCreateNameConflict(err) { 154 if _, err := r.adapter.inspect(ctx); err != nil { 155 return err 156 } 157 158 // container is already created. success! 159 return exec.ErrTaskPrepared 160 } 161 162 return err 163 } 164 165 return nil 166 } 167 168 // Start the container. An error will be returned if the container is already started. 169 func (r *controller) Start(ctx context.Context) error { 170 if err := r.checkClosed(); err != nil { 171 return err 172 } 173 174 ctnr, err := r.adapter.inspect(ctx) 175 if err != nil { 176 return err 177 } 178 179 // Detect whether the container has *ever* been started. If so, we don't 180 // issue the start. 181 // 182 // TODO(stevvooe): This is very racy. While reading inspect, another could 183 // start the process and we could end up starting it twice. 184 if ctnr.State.Status != "created" { 185 return exec.ErrTaskStarted 186 } 187 188 for { 189 if err := r.adapter.start(ctx); err != nil { 190 if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok { 191 // Retry network creation again if we 192 // failed because some of the networks 193 // were not found. 194 if err := r.adapter.createNetworks(ctx); err != nil { 195 return err 196 } 197 198 continue 199 } 200 201 return errors.Wrap(err, "starting container failed") 202 } 203 204 break 205 } 206 207 // no health check 208 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil { 209 if err := r.adapter.activateServiceBinding(); err != nil { 210 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 211 return err 212 } 213 return nil 214 } 215 216 healthCmd := ctnr.Config.Healthcheck.Test 217 218 if len(healthCmd) == 0 || healthCmd[0] == "NONE" { 219 return nil 220 } 221 222 // wait for container to be healthy 223 eventq := r.adapter.events(ctx) 224 225 var healthErr error 226 for { 227 select { 228 case event := <-eventq: 229 if !r.matchevent(event) { 230 continue 231 } 232 233 switch event.Action { 234 case "die": // exit on terminal events 235 ctnr, err := r.adapter.inspect(ctx) 236 if err != nil { 237 return errors.Wrap(err, "die event received") 238 } else if ctnr.State.ExitCode != 0 { 239 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 240 } 241 242 return nil 243 case "destroy": 244 // If we get here, something has gone wrong but we want to exit 245 // and report anyways. 246 return ErrContainerDestroyed 247 case "health_status: unhealthy": 248 // in this case, we stop the container and report unhealthy status 249 if err := r.Shutdown(ctx); err != nil { 250 return errors.Wrap(err, "unhealthy container shutdown failed") 251 } 252 // set health check error, and wait for container to fully exit ("die" event) 253 healthErr = ErrContainerUnhealthy 254 case "health_status: healthy": 255 if err := r.adapter.activateServiceBinding(); err != nil { 256 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 257 return err 258 } 259 return nil 260 } 261 case <-ctx.Done(): 262 return ctx.Err() 263 case <-r.closed: 264 return r.err 265 } 266 } 267 } 268 269 // Wait on the container to exit. 270 func (r *controller) Wait(pctx context.Context) error { 271 if err := r.checkClosed(); err != nil { 272 return err 273 } 274 275 ctx, cancel := context.WithCancel(pctx) 276 defer cancel() 277 278 healthErr := make(chan error, 1) 279 go func() { 280 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 281 defer cancel() 282 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 283 healthErr <- ErrContainerUnhealthy 284 if err := r.Shutdown(ectx); err != nil { 285 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 286 } 287 } 288 }() 289 290 err := r.adapter.wait(ctx) 291 if ctx.Err() != nil { 292 return ctx.Err() 293 } 294 295 if err != nil { 296 ee := &exitError{} 297 if ec, ok := err.(exec.ExitCoder); ok { 298 ee.code = ec.ExitCode() 299 } 300 select { 301 case e := <-healthErr: 302 ee.cause = e 303 default: 304 if err.Error() != "" { 305 ee.cause = err 306 } 307 } 308 return ee 309 } 310 311 return nil 312 } 313 314 // Shutdown the container cleanly. 315 func (r *controller) Shutdown(ctx context.Context) error { 316 if err := r.checkClosed(); err != nil { 317 return err 318 } 319 320 if r.cancelPull != nil { 321 r.cancelPull() 322 } 323 324 // remove container from service binding 325 if err := r.adapter.deactivateServiceBinding(); err != nil { 326 log.G(ctx).WithError(err).Errorf("failed to deactivate service binding for container %s", r.adapter.container.name()) 327 return err 328 } 329 330 if err := r.adapter.shutdown(ctx); err != nil { 331 if isUnknownContainer(err) || isStoppedContainer(err) { 332 return nil 333 } 334 335 return err 336 } 337 338 return nil 339 } 340 341 // Terminate the container, with force. 342 func (r *controller) Terminate(ctx context.Context) error { 343 if err := r.checkClosed(); err != nil { 344 return err 345 } 346 347 if r.cancelPull != nil { 348 r.cancelPull() 349 } 350 351 if err := r.adapter.terminate(ctx); err != nil { 352 if isUnknownContainer(err) { 353 return nil 354 } 355 356 return err 357 } 358 359 return nil 360 } 361 362 // Remove the container and its resources. 363 func (r *controller) Remove(ctx context.Context) error { 364 if err := r.checkClosed(); err != nil { 365 return err 366 } 367 368 if r.cancelPull != nil { 369 r.cancelPull() 370 } 371 372 // It may be necessary to shut down the task before removing it. 373 if err := r.Shutdown(ctx); err != nil { 374 if isUnknownContainer(err) { 375 return nil 376 } 377 // This may fail if the task was already shut down. 378 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 379 } 380 381 // Try removing networks referenced in this task in case this 382 // task is the last one referencing it 383 if err := r.adapter.removeNetworks(ctx); err != nil { 384 if isUnknownContainer(err) { 385 return nil 386 } 387 return err 388 } 389 390 if err := r.adapter.remove(ctx); err != nil { 391 if isUnknownContainer(err) { 392 return nil 393 } 394 395 return err 396 } 397 return nil 398 } 399 400 // waitReady waits for a container to be "ready". 401 // Ready means it's past the started state. 402 func (r *controller) waitReady(pctx context.Context) error { 403 if err := r.checkClosed(); err != nil { 404 return err 405 } 406 407 ctx, cancel := context.WithCancel(pctx) 408 defer cancel() 409 410 eventq := r.adapter.events(ctx) 411 412 ctnr, err := r.adapter.inspect(ctx) 413 if err != nil { 414 if !isUnknownContainer(err) { 415 return errors.Wrap(err, "inspect container failed") 416 } 417 } else { 418 switch ctnr.State.Status { 419 case "running", "exited", "dead": 420 return nil 421 } 422 } 423 424 for { 425 select { 426 case event := <-eventq: 427 if !r.matchevent(event) { 428 continue 429 } 430 431 switch event.Action { 432 case "start": 433 return nil 434 } 435 case <-ctx.Done(): 436 return ctx.Err() 437 case <-r.closed: 438 return r.err 439 } 440 } 441 } 442 443 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 444 if err := r.checkClosed(); err != nil { 445 return err 446 } 447 448 if err := r.waitReady(ctx); err != nil { 449 return errors.Wrap(err, "container not ready for logs") 450 } 451 452 rc, err := r.adapter.logs(ctx, options) 453 if err != nil { 454 return errors.Wrap(err, "failed getting container logs") 455 } 456 defer rc.Close() 457 458 var ( 459 // use a rate limiter to keep things under control but also provides some 460 // ability coalesce messages. 461 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 462 msgctx = api.LogContext{ 463 NodeID: r.task.NodeID, 464 ServiceID: r.task.ServiceID, 465 TaskID: r.task.ID, 466 } 467 ) 468 469 brd := bufio.NewReader(rc) 470 for { 471 // so, message header is 8 bytes, treat as uint64, pull stream off MSB 472 var header uint64 473 if err := binary.Read(brd, binary.BigEndian, &header); err != nil { 474 if err == io.EOF { 475 return nil 476 } 477 478 return errors.Wrap(err, "failed reading log header") 479 } 480 481 stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3)) 482 483 // limit here to decrease allocation back pressure. 484 if err := limiter.WaitN(ctx, int(size)); err != nil { 485 return errors.Wrap(err, "failed rate limiter") 486 } 487 488 buf := make([]byte, size) 489 _, err := io.ReadFull(brd, buf) 490 if err != nil { 491 return errors.Wrap(err, "failed reading buffer") 492 } 493 494 // Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish 495 parts := bytes.SplitN(buf, []byte(" "), 2) 496 if len(parts) != 2 { 497 return fmt.Errorf("invalid timestamp in log message: %v", buf) 498 } 499 500 ts, err := time.Parse(time.RFC3339Nano, string(parts[0])) 501 if err != nil { 502 return errors.Wrap(err, "failed to parse timestamp") 503 } 504 505 tsp, err := ptypes.TimestampProto(ts) 506 if err != nil { 507 return errors.Wrap(err, "failed to convert timestamp") 508 } 509 510 if err := publisher.Publish(ctx, api.LogMessage{ 511 Context: msgctx, 512 Timestamp: tsp, 513 Stream: api.LogStream(stream), 514 515 Data: parts[1], 516 }); err != nil { 517 return errors.Wrap(err, "failed to publish log message") 518 } 519 } 520 } 521 522 // Close the runner and clean up any ephemeral resources. 523 func (r *controller) Close() error { 524 select { 525 case <-r.closed: 526 return r.err 527 default: 528 if r.cancelPull != nil { 529 r.cancelPull() 530 } 531 532 r.err = exec.ErrControllerClosed 533 close(r.closed) 534 } 535 return nil 536 } 537 538 func (r *controller) matchevent(event events.Message) bool { 539 if event.Type != events.ContainerEventType { 540 return false 541 } 542 543 // TODO(stevvooe): Filter based on ID matching, in addition to name. 544 545 // Make sure the events are for this container. 546 if event.Actor.Attributes["name"] != r.adapter.container.name() { 547 return false 548 } 549 550 return true 551 } 552 553 func (r *controller) checkClosed() error { 554 select { 555 case <-r.closed: 556 return r.err 557 default: 558 return nil 559 } 560 } 561 562 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 563 status := &api.ContainerStatus{ 564 ContainerID: ctnr.ID, 565 PID: int32(ctnr.State.Pid), 566 ExitCode: int32(ctnr.State.ExitCode), 567 } 568 569 return status, nil 570 } 571 572 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 573 status := &api.PortStatus{} 574 575 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 576 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 577 if err != nil { 578 return nil, err 579 } 580 status.Ports = exposedPorts 581 } 582 583 return status, nil 584 } 585 586 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 587 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 588 589 for portProtocol, mapping := range portMap { 590 parts := strings.SplitN(string(portProtocol), "/", 2) 591 if len(parts) != 2 { 592 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 593 } 594 595 port, err := strconv.ParseUint(parts[0], 10, 16) 596 if err != nil { 597 return nil, err 598 } 599 600 protocol := api.ProtocolTCP 601 switch strings.ToLower(parts[1]) { 602 case "tcp": 603 protocol = api.ProtocolTCP 604 case "udp": 605 protocol = api.ProtocolUDP 606 default: 607 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 608 } 609 610 for _, binding := range mapping { 611 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 612 if err != nil { 613 return nil, err 614 } 615 616 // TODO(aluzzardi): We're losing the port `name` here since 617 // there's no way to retrieve it back from the Engine. 618 exposedPorts = append(exposedPorts, &api.PortConfig{ 619 PublishMode: api.PublishModeHost, 620 Protocol: protocol, 621 TargetPort: uint32(port), 622 PublishedPort: uint32(hostPort), 623 }) 624 } 625 } 626 627 return exposedPorts, nil 628 } 629 630 type exitError struct { 631 code int 632 cause error 633 } 634 635 func (e *exitError) Error() string { 636 if e.cause != nil { 637 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 638 } 639 640 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 641 } 642 643 func (e *exitError) ExitCode() int { 644 return int(e.code) 645 } 646 647 func (e *exitError) Cause() error { 648 return e.cause 649 } 650 651 // checkHealth blocks until unhealthy container is detected or ctx exits 652 func (r *controller) checkHealth(ctx context.Context) error { 653 eventq := r.adapter.events(ctx) 654 655 for { 656 select { 657 case <-ctx.Done(): 658 return nil 659 case <-r.closed: 660 return nil 661 case event := <-eventq: 662 if !r.matchevent(event) { 663 continue 664 } 665 666 switch event.Action { 667 case "health_status: unhealthy": 668 return ErrContainerUnhealthy 669 } 670 } 671 } 672 }