github.com/yogeshlonkar/moby@v1.13.2-0.20201203103638-c0b64beaea94/daemon/cluster/executor/container/controller.go (about) 1 package container 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "os" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/events" 16 executorpkg "github.com/docker/docker/daemon/cluster/executor" 17 "github.com/docker/go-connections/nat" 18 "github.com/docker/libnetwork" 19 "github.com/docker/swarmkit/agent/exec" 20 "github.com/docker/swarmkit/api" 21 "github.com/docker/swarmkit/log" 22 "github.com/docker/swarmkit/protobuf/ptypes" 23 "github.com/pkg/errors" 24 "golang.org/x/net/context" 25 "golang.org/x/time/rate" 26 ) 27 28 // controller implements agent.Controller against docker's API. 29 // 30 // Most operations against docker's API are done through the container name, 31 // which is unique to the task. 32 type controller struct { 33 task *api.Task 34 adapter *containerAdapter 35 closed chan struct{} 36 err error 37 38 pulled chan struct{} // closed after pull 39 cancelPull func() // cancels pull context if not nil 40 pullErr error // pull error, only read after pulled closed 41 } 42 43 var _ exec.Controller = &controller{} 44 45 // NewController returns a docker exec runner for the provided task. 46 func newController(b executorpkg.Backend, task *api.Task, secrets exec.SecretGetter) (*controller, error) { 47 adapter, err := newContainerAdapter(b, task, secrets) 48 if err != nil { 49 return nil, err 50 } 51 52 return &controller{ 53 task: task, 54 adapter: adapter, 55 closed: make(chan struct{}), 56 }, nil 57 } 58 59 func (r *controller) Task() (*api.Task, error) { 60 return r.task, nil 61 } 62 63 // ContainerStatus returns the container-specific status for the task. 64 func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) { 65 ctnr, err := r.adapter.inspect(ctx) 66 if err != nil { 67 if isUnknownContainer(err) { 68 return nil, nil 69 } 70 return nil, err 71 } 72 return parseContainerStatus(ctnr) 73 } 74 75 func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) { 76 ctnr, err := r.adapter.inspect(ctx) 77 if err != nil { 78 if isUnknownContainer(err) { 79 return nil, nil 80 } 81 82 return nil, err 83 } 84 85 return parsePortStatus(ctnr) 86 } 87 88 // Update tasks a recent task update and applies it to the container. 89 func (r *controller) Update(ctx context.Context, t *api.Task) error { 90 // TODO(stevvooe): While assignment of tasks is idempotent, we do allow 91 // updates of metadata, such as labelling, as well as any other properties 92 // that make sense. 93 return nil 94 } 95 96 // Prepare creates a container and ensures the image is pulled. 97 // 98 // If the container has already be created, exec.ErrTaskPrepared is returned. 99 func (r *controller) Prepare(ctx context.Context) error { 100 if err := r.checkClosed(); err != nil { 101 return err 102 } 103 104 // Make sure all the networks that the task needs are created. 105 if err := r.adapter.createNetworks(ctx); err != nil { 106 return err 107 } 108 109 // Make sure all the volumes that the task needs are created. 110 if err := r.adapter.createVolumes(ctx); err != nil { 111 return err 112 } 113 114 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 115 if r.pulled == nil { 116 // Fork the pull to a different context to allow pull to continue 117 // on re-entrant calls to Prepare. This ensures that Prepare can be 118 // idempotent and not incur the extra cost of pulling when 119 // cancelled on updates. 120 var pctx context.Context 121 122 r.pulled = make(chan struct{}) 123 pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller. 124 125 go func() { 126 defer close(r.pulled) 127 r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled 128 }() 129 } 130 131 select { 132 case <-ctx.Done(): 133 return ctx.Err() 134 case <-r.pulled: 135 if r.pullErr != nil { 136 // NOTE(stevvooe): We always try to pull the image to make sure we have 137 // the most up to date version. This will return an error, but we only 138 // log it. If the image truly doesn't exist, the create below will 139 // error out. 140 // 141 // This gives us some nice behavior where we use up to date versions of 142 // mutable tags, but will still run if the old image is available but a 143 // registry is down. 144 // 145 // If you don't want this behavior, lock down your image to an 146 // immutable tag or digest. 147 log.G(ctx).WithError(r.pullErr).Error("pulling image failed") 148 } 149 } 150 } 151 152 if err := r.adapter.create(ctx); err != nil { 153 if isContainerCreateNameConflict(err) { 154 if _, err := r.adapter.inspect(ctx); err != nil { 155 return err 156 } 157 158 // container is already created. success! 159 return exec.ErrTaskPrepared 160 } 161 162 return err 163 } 164 165 return nil 166 } 167 168 // Start the container. An error will be returned if the container is already started. 169 func (r *controller) Start(ctx context.Context) error { 170 if err := r.checkClosed(); err != nil { 171 return err 172 } 173 174 ctnr, err := r.adapter.inspect(ctx) 175 if err != nil { 176 return err 177 } 178 179 // Detect whether the container has *ever* been started. If so, we don't 180 // issue the start. 181 // 182 // TODO(stevvooe): This is very racy. While reading inspect, another could 183 // start the process and we could end up starting it twice. 184 if ctnr.State.Status != "created" { 185 return exec.ErrTaskStarted 186 } 187 188 for { 189 if err := r.adapter.start(ctx); err != nil { 190 if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok { 191 // Retry network creation again if we 192 // failed because some of the networks 193 // were not found. 194 if err := r.adapter.createNetworks(ctx); err != nil { 195 return err 196 } 197 198 continue 199 } 200 201 return errors.Wrap(err, "starting container failed") 202 } 203 204 break 205 } 206 207 // no health check 208 if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" { 209 if err := r.adapter.activateServiceBinding(); err != nil { 210 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name()) 211 return err 212 } 213 return nil 214 } 215 216 // wait for container to be healthy 217 eventq := r.adapter.events(ctx) 218 219 var healthErr error 220 for { 221 select { 222 case event := <-eventq: 223 if !r.matchevent(event) { 224 continue 225 } 226 227 switch event.Action { 228 case "die": // exit on terminal events 229 ctnr, err := r.adapter.inspect(ctx) 230 if err != nil { 231 return errors.Wrap(err, "die event received") 232 } else if ctnr.State.ExitCode != 0 { 233 return &exitError{code: ctnr.State.ExitCode, cause: healthErr} 234 } 235 236 return nil 237 case "destroy": 238 // If we get here, something has gone wrong but we want to exit 239 // and report anyways. 240 return ErrContainerDestroyed 241 case "health_status: unhealthy": 242 // in this case, we stop the container and report unhealthy status 243 if err := r.Shutdown(ctx); err != nil { 244 return errors.Wrap(err, "unhealthy container shutdown failed") 245 } 246 // set health check error, and wait for container to fully exit ("die" event) 247 healthErr = ErrContainerUnhealthy 248 case "health_status: healthy": 249 if err := r.adapter.activateServiceBinding(); err != nil { 250 log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name()) 251 return err 252 } 253 return nil 254 } 255 case <-ctx.Done(): 256 return ctx.Err() 257 case <-r.closed: 258 return r.err 259 } 260 } 261 } 262 263 // Wait on the container to exit. 264 func (r *controller) Wait(pctx context.Context) error { 265 if err := r.checkClosed(); err != nil { 266 return err 267 } 268 269 ctx, cancel := context.WithCancel(pctx) 270 defer cancel() 271 272 healthErr := make(chan error, 1) 273 go func() { 274 ectx, cancel := context.WithCancel(ctx) // cancel event context on first event 275 defer cancel() 276 if err := r.checkHealth(ectx); err == ErrContainerUnhealthy { 277 healthErr <- ErrContainerUnhealthy 278 if err := r.Shutdown(ectx); err != nil { 279 log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy") 280 } 281 } 282 }() 283 284 err := r.adapter.wait(ctx) 285 if ctx.Err() != nil { 286 return ctx.Err() 287 } 288 289 if err != nil { 290 ee := &exitError{} 291 if ec, ok := err.(exec.ExitCoder); ok { 292 ee.code = ec.ExitCode() 293 } 294 select { 295 case e := <-healthErr: 296 ee.cause = e 297 default: 298 if err.Error() != "" { 299 ee.cause = err 300 } 301 } 302 return ee 303 } 304 305 return nil 306 } 307 308 // Shutdown the container cleanly. 309 func (r *controller) Shutdown(ctx context.Context) error { 310 if err := r.checkClosed(); err != nil { 311 return err 312 } 313 314 if r.cancelPull != nil { 315 r.cancelPull() 316 } 317 318 // remove container from service binding 319 if err := r.adapter.deactivateServiceBinding(); err != nil { 320 log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name()) 321 // Don't return an error here, because failure to deactivate 322 // the service binding is expected if the container was never 323 // started. 324 } 325 326 if err := r.adapter.shutdown(ctx); err != nil { 327 if isUnknownContainer(err) || isStoppedContainer(err) { 328 return nil 329 } 330 331 return err 332 } 333 334 return nil 335 } 336 337 // Terminate the container, with force. 338 func (r *controller) Terminate(ctx context.Context) error { 339 if err := r.checkClosed(); err != nil { 340 return err 341 } 342 343 if r.cancelPull != nil { 344 r.cancelPull() 345 } 346 347 if err := r.adapter.terminate(ctx); err != nil { 348 if isUnknownContainer(err) { 349 return nil 350 } 351 352 return err 353 } 354 355 return nil 356 } 357 358 // Remove the container and its resources. 359 func (r *controller) Remove(ctx context.Context) error { 360 if err := r.checkClosed(); err != nil { 361 return err 362 } 363 364 if r.cancelPull != nil { 365 r.cancelPull() 366 } 367 368 // It may be necessary to shut down the task before removing it. 369 if err := r.Shutdown(ctx); err != nil { 370 if isUnknownContainer(err) { 371 return nil 372 } 373 // This may fail if the task was already shut down. 374 log.G(ctx).WithError(err).Debug("shutdown failed on removal") 375 } 376 377 // Try removing networks referenced in this task in case this 378 // task is the last one referencing it 379 if err := r.adapter.removeNetworks(ctx); err != nil { 380 if isUnknownContainer(err) { 381 return nil 382 } 383 return err 384 } 385 386 if err := r.adapter.remove(ctx); err != nil { 387 if isUnknownContainer(err) { 388 return nil 389 } 390 391 return err 392 } 393 return nil 394 } 395 396 // waitReady waits for a container to be "ready". 397 // Ready means it's past the started state. 398 func (r *controller) waitReady(pctx context.Context) error { 399 if err := r.checkClosed(); err != nil { 400 return err 401 } 402 403 ctx, cancel := context.WithCancel(pctx) 404 defer cancel() 405 406 eventq := r.adapter.events(ctx) 407 408 ctnr, err := r.adapter.inspect(ctx) 409 if err != nil { 410 if !isUnknownContainer(err) { 411 return errors.Wrap(err, "inspect container failed") 412 } 413 } else { 414 switch ctnr.State.Status { 415 case "running", "exited", "dead": 416 return nil 417 } 418 } 419 420 for { 421 select { 422 case event := <-eventq: 423 if !r.matchevent(event) { 424 continue 425 } 426 427 switch event.Action { 428 case "start": 429 return nil 430 } 431 case <-ctx.Done(): 432 return ctx.Err() 433 case <-r.closed: 434 return r.err 435 } 436 } 437 } 438 439 func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error { 440 if err := r.checkClosed(); err != nil { 441 return err 442 } 443 444 if err := r.waitReady(ctx); err != nil { 445 return errors.Wrap(err, "container not ready for logs") 446 } 447 448 rc, err := r.adapter.logs(ctx, options) 449 if err != nil { 450 return errors.Wrap(err, "failed getting container logs") 451 } 452 defer rc.Close() 453 454 var ( 455 // use a rate limiter to keep things under control but also provides some 456 // ability coalesce messages. 457 limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s 458 msgctx = api.LogContext{ 459 NodeID: r.task.NodeID, 460 ServiceID: r.task.ServiceID, 461 TaskID: r.task.ID, 462 } 463 ) 464 465 brd := bufio.NewReader(rc) 466 for { 467 // so, message header is 8 bytes, treat as uint64, pull stream off MSB 468 var header uint64 469 if err := binary.Read(brd, binary.BigEndian, &header); err != nil { 470 if err == io.EOF { 471 return nil 472 } 473 474 return errors.Wrap(err, "failed reading log header") 475 } 476 477 stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3)) 478 479 // limit here to decrease allocation back pressure. 480 if err := limiter.WaitN(ctx, int(size)); err != nil { 481 return errors.Wrap(err, "failed rate limiter") 482 } 483 484 buf := make([]byte, size) 485 _, err := io.ReadFull(brd, buf) 486 if err != nil { 487 return errors.Wrap(err, "failed reading buffer") 488 } 489 490 // Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish 491 parts := bytes.SplitN(buf, []byte(" "), 2) 492 if len(parts) != 2 { 493 return fmt.Errorf("invalid timestamp in log message: %v", buf) 494 } 495 496 ts, err := time.Parse(time.RFC3339Nano, string(parts[0])) 497 if err != nil { 498 return errors.Wrap(err, "failed to parse timestamp") 499 } 500 501 tsp, err := ptypes.TimestampProto(ts) 502 if err != nil { 503 return errors.Wrap(err, "failed to convert timestamp") 504 } 505 506 if err := publisher.Publish(ctx, api.LogMessage{ 507 Context: msgctx, 508 Timestamp: tsp, 509 Stream: api.LogStream(stream), 510 511 Data: parts[1], 512 }); err != nil { 513 return errors.Wrap(err, "failed to publish log message") 514 } 515 } 516 } 517 518 // Close the runner and clean up any ephemeral resources. 519 func (r *controller) Close() error { 520 select { 521 case <-r.closed: 522 return r.err 523 default: 524 if r.cancelPull != nil { 525 r.cancelPull() 526 } 527 528 r.err = exec.ErrControllerClosed 529 close(r.closed) 530 } 531 return nil 532 } 533 534 func (r *controller) matchevent(event events.Message) bool { 535 if event.Type != events.ContainerEventType { 536 return false 537 } 538 539 // TODO(stevvooe): Filter based on ID matching, in addition to name. 540 541 // Make sure the events are for this container. 542 if event.Actor.Attributes["name"] != r.adapter.container.name() { 543 return false 544 } 545 546 return true 547 } 548 549 func (r *controller) checkClosed() error { 550 select { 551 case <-r.closed: 552 return r.err 553 default: 554 return nil 555 } 556 } 557 558 func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) { 559 status := &api.ContainerStatus{ 560 ContainerID: ctnr.ID, 561 PID: int32(ctnr.State.Pid), 562 ExitCode: int32(ctnr.State.ExitCode), 563 } 564 565 return status, nil 566 } 567 568 func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) { 569 status := &api.PortStatus{} 570 571 if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 { 572 exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports) 573 if err != nil { 574 return nil, err 575 } 576 status.Ports = exposedPorts 577 } 578 579 return status, nil 580 } 581 582 func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) { 583 exposedPorts := make([]*api.PortConfig, 0, len(portMap)) 584 585 for portProtocol, mapping := range portMap { 586 parts := strings.SplitN(string(portProtocol), "/", 2) 587 if len(parts) != 2 { 588 return nil, fmt.Errorf("invalid port mapping: %s", portProtocol) 589 } 590 591 port, err := strconv.ParseUint(parts[0], 10, 16) 592 if err != nil { 593 return nil, err 594 } 595 596 protocol := api.ProtocolTCP 597 switch strings.ToLower(parts[1]) { 598 case "tcp": 599 protocol = api.ProtocolTCP 600 case "udp": 601 protocol = api.ProtocolUDP 602 default: 603 return nil, fmt.Errorf("invalid protocol: %s", parts[1]) 604 } 605 606 for _, binding := range mapping { 607 hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16) 608 if err != nil { 609 return nil, err 610 } 611 612 // TODO(aluzzardi): We're losing the port `name` here since 613 // there's no way to retrieve it back from the Engine. 614 exposedPorts = append(exposedPorts, &api.PortConfig{ 615 PublishMode: api.PublishModeHost, 616 Protocol: protocol, 617 TargetPort: uint32(port), 618 PublishedPort: uint32(hostPort), 619 }) 620 } 621 } 622 623 return exposedPorts, nil 624 } 625 626 type exitError struct { 627 code int 628 cause error 629 } 630 631 func (e *exitError) Error() string { 632 if e.cause != nil { 633 return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause) 634 } 635 636 return fmt.Sprintf("task: non-zero exit (%v)", e.code) 637 } 638 639 func (e *exitError) ExitCode() int { 640 return int(e.code) 641 } 642 643 func (e *exitError) Cause() error { 644 return e.cause 645 } 646 647 // checkHealth blocks until unhealthy container is detected or ctx exits 648 func (r *controller) checkHealth(ctx context.Context) error { 649 eventq := r.adapter.events(ctx) 650 651 for { 652 select { 653 case <-ctx.Done(): 654 return nil 655 case <-r.closed: 656 return nil 657 case event := <-eventq: 658 if !r.matchevent(event) { 659 continue 660 } 661 662 switch event.Action { 663 case "health_status: unhealthy": 664 return ErrContainerUnhealthy 665 } 666 } 667 } 668 }