github.com/adityamillind98/moby@v23.0.0-rc.4+incompatible/libcontainerd/remote/client.go (about) 1 package remote // import "github.com/docker/docker/libcontainerd/remote" 2 3 import ( 4 "context" 5 "encoding/json" 6 "io" 7 "os" 8 "path/filepath" 9 "reflect" 10 "runtime" 11 "strings" 12 "sync" 13 "syscall" 14 "time" 15 16 "github.com/containerd/containerd" 17 apievents "github.com/containerd/containerd/api/events" 18 "github.com/containerd/containerd/api/types" 19 "github.com/containerd/containerd/archive" 20 "github.com/containerd/containerd/cio" 21 "github.com/containerd/containerd/content" 22 containerderrors "github.com/containerd/containerd/errdefs" 23 "github.com/containerd/containerd/events" 24 "github.com/containerd/containerd/images" 25 v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" 26 "github.com/containerd/typeurl" 27 "github.com/docker/docker/errdefs" 28 "github.com/docker/docker/libcontainerd/queue" 29 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 30 "github.com/docker/docker/pkg/ioutils" 31 v1 "github.com/opencontainers/image-spec/specs-go/v1" 32 specs "github.com/opencontainers/runtime-spec/specs-go" 33 "github.com/pkg/errors" 34 "github.com/sirupsen/logrus" 35 "google.golang.org/grpc/codes" 36 "google.golang.org/grpc/status" 37 ) 38 39 // DockerContainerBundlePath is the label key pointing to the container's bundle path 40 const DockerContainerBundlePath = "com.docker/engine.bundle.path" 41 42 type client struct { 43 client *containerd.Client 44 stateDir string 45 logger *logrus.Entry 46 ns string 47 48 backend libcontainerdtypes.Backend 49 eventQ queue.Queue 50 oomMu sync.Mutex 51 oom map[string]bool 52 v2runcoptionsMu sync.Mutex 53 // v2runcoptions is used for copying options specified on Create() to Start() 54 v2runcoptions map[string]v2runcoptions.Options 55 } 56 57 // NewClient creates a new libcontainerd client from a containerd client 58 func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) { 59 c := &client{ 60 client: cli, 61 stateDir: stateDir, 62 logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns), 63 ns: ns, 64 backend: b, 65 oom: make(map[string]bool), 66 v2runcoptions: make(map[string]v2runcoptions.Options), 67 } 68 69 go c.processEventStream(ctx, ns) 70 71 return c, nil 72 } 73 74 func (c *client) Version(ctx context.Context) (containerd.Version, error) { 75 return c.client.Version(ctx) 76 } 77 78 // Restore loads the containerd container. 79 // It should not be called concurrently with any other operation for the given ID. 80 func (c *client) Restore(ctx context.Context, id string, attachStdio libcontainerdtypes.StdioCallback) (alive bool, pid int, p libcontainerdtypes.Process, err error) { 81 var dio *cio.DirectIO 82 defer func() { 83 if err != nil && dio != nil { 84 dio.Cancel() 85 dio.Close() 86 } 87 err = wrapError(err) 88 }() 89 90 ctr, err := c.client.LoadContainer(ctx, id) 91 if err != nil { 92 return false, -1, nil, errors.WithStack(wrapError(err)) 93 } 94 95 attachIO := func(fifos *cio.FIFOSet) (cio.IO, error) { 96 // dio must be assigned to the previously defined dio for the defer above 97 // to handle cleanup 98 dio, err = c.newDirectIO(ctx, fifos) 99 if err != nil { 100 return nil, err 101 } 102 return attachStdio(dio) 103 } 104 t, err := ctr.Task(ctx, attachIO) 105 if err != nil && !containerderrors.IsNotFound(err) { 106 return false, -1, nil, errors.Wrap(wrapError(err), "error getting containerd task for container") 107 } 108 109 if t != nil { 110 s, err := t.Status(ctx) 111 if err != nil { 112 return false, -1, nil, errors.Wrap(wrapError(err), "error getting task status") 113 } 114 alive = s.Status != containerd.Stopped 115 pid = int(t.Pid()) 116 } 117 118 c.logger.WithFields(logrus.Fields{ 119 "container": id, 120 "alive": alive, 121 "pid": pid, 122 }).Debug("restored container") 123 124 return alive, pid, &restoredProcess{ 125 p: t, 126 }, nil 127 } 128 129 func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error { 130 bdir := c.bundleDir(id) 131 c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created") 132 133 newOpts := []containerd.NewContainerOpts{ 134 containerd.WithSpec(ociSpec), 135 containerd.WithRuntime(shim, runtimeOptions), 136 WithBundle(bdir, ociSpec), 137 } 138 opts = append(opts, newOpts...) 139 140 _, err := c.client.NewContainer(ctx, id, opts...) 141 if err != nil { 142 if containerderrors.IsAlreadyExists(err) { 143 return errors.WithStack(errdefs.Conflict(errors.New("id already in use"))) 144 } 145 return wrapError(err) 146 } 147 if x, ok := runtimeOptions.(*v2runcoptions.Options); ok { 148 c.v2runcoptionsMu.Lock() 149 c.v2runcoptions[id] = *x 150 c.v2runcoptionsMu.Unlock() 151 } 152 return nil 153 } 154 155 // Start create and start a task for the specified containerd id 156 func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) { 157 ctr, err := c.getContainer(ctx, id) 158 if err != nil { 159 return -1, err 160 } 161 var ( 162 cp *types.Descriptor 163 t containerd.Task 164 rio cio.IO 165 stdinCloseSync = make(chan struct{}) 166 ) 167 168 if checkpointDir != "" { 169 // write checkpoint to the content store 170 tar := archive.Diff(ctx, "", checkpointDir) 171 cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar) 172 // remove the checkpoint when we're done 173 defer func() { 174 if cp != nil { 175 err := c.client.ContentStore().Delete(context.Background(), cp.Digest) 176 if err != nil { 177 c.logger.WithError(err).WithFields(logrus.Fields{ 178 "ref": checkpointDir, 179 "digest": cp.Digest, 180 }).Warnf("failed to delete temporary checkpoint entry") 181 } 182 } 183 }() 184 if err := tar.Close(); err != nil { 185 return -1, errors.Wrap(err, "failed to close checkpoint tar stream") 186 } 187 if err != nil { 188 return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd") 189 } 190 } 191 192 spec, err := ctr.Spec(ctx) 193 if err != nil { 194 return -1, errors.Wrap(err, "failed to retrieve spec") 195 } 196 labels, err := ctr.Labels(ctx) 197 if err != nil { 198 return -1, errors.Wrap(err, "failed to retrieve labels") 199 } 200 bundle := labels[DockerContainerBundlePath] 201 uid, gid := getSpecUser(spec) 202 203 taskOpts := []containerd.NewTaskOpts{ 204 func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { 205 info.Checkpoint = cp 206 return nil 207 }, 208 } 209 210 if runtime.GOOS != "windows" { 211 taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { 212 c.v2runcoptionsMu.Lock() 213 opts, ok := c.v2runcoptions[id] 214 c.v2runcoptionsMu.Unlock() 215 if ok { 216 opts.IoUid = uint32(uid) 217 opts.IoGid = uint32(gid) 218 info.Options = &opts 219 } 220 return nil 221 }) 222 } else { 223 taskOpts = append(taskOpts, withLogLevel(c.logger.Level)) 224 } 225 226 t, err = ctr.NewTask(ctx, 227 func(id string) (cio.IO, error) { 228 fifos := newFIFOSet(bundle, libcontainerdtypes.InitProcessName, withStdin, spec.Process.Terminal) 229 230 rio, err = c.createIO(fifos, id, libcontainerdtypes.InitProcessName, stdinCloseSync, attachStdio) 231 return rio, err 232 }, 233 taskOpts..., 234 ) 235 if err != nil { 236 close(stdinCloseSync) 237 if rio != nil { 238 rio.Cancel() 239 rio.Close() 240 } 241 return -1, wrapError(err) 242 } 243 244 // Signal c.createIO that it can call CloseIO 245 close(stdinCloseSync) 246 247 if err := t.Start(ctx); err != nil { 248 // Only Stopped tasks can be deleted. Created tasks have to be 249 // killed first, to transition them to Stopped. 250 if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil { 251 c.logger.WithError(err).WithField("container", id). 252 Error("failed to delete task after fail start") 253 } 254 return -1, wrapError(err) 255 } 256 257 return int(t.Pid()), nil 258 } 259 260 // Exec creates exec process. 261 // 262 // The containerd client calls Exec to register the exec config in the shim side. 263 // When the client calls Start, the shim will create stdin fifo if needs. But 264 // for the container main process, the stdin fifo will be created in Create not 265 // the Start call. stdinCloseSync channel should be closed after Start exec 266 // process. 267 func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) { 268 ctr, err := c.getContainer(ctx, containerID) 269 if err != nil { 270 return -1, err 271 } 272 t, err := ctr.Task(ctx, nil) 273 if err != nil { 274 if containerderrors.IsNotFound(err) { 275 return -1, errors.WithStack(errdefs.InvalidParameter(errors.New("container is not running"))) 276 } 277 return -1, wrapError(err) 278 } 279 280 var ( 281 p containerd.Process 282 rio cio.IO 283 stdinCloseSync = make(chan struct{}) 284 ) 285 286 labels, err := ctr.Labels(ctx) 287 if err != nil { 288 return -1, wrapError(err) 289 } 290 291 fifos := newFIFOSet(labels[DockerContainerBundlePath], processID, withStdin, spec.Terminal) 292 293 defer func() { 294 if err != nil { 295 if rio != nil { 296 rio.Cancel() 297 rio.Close() 298 } 299 } 300 }() 301 302 p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) { 303 rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio) 304 return rio, err 305 }) 306 if err != nil { 307 close(stdinCloseSync) 308 if containerderrors.IsAlreadyExists(err) { 309 return -1, errors.WithStack(errdefs.Conflict(errors.New("id already in use"))) 310 } 311 return -1, wrapError(err) 312 } 313 314 // Signal c.createIO that it can call CloseIO 315 // 316 // the stdin of exec process will be created after p.Start in containerd 317 defer close(stdinCloseSync) 318 319 if err = p.Start(ctx); err != nil { 320 // use new context for cleanup because old one may be cancelled by user, but leave a timeout to make sure 321 // we are not waiting forever if containerd is unresponsive or to work around fifo cancelling issues in 322 // older containerd-shim 323 ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second) 324 defer cancel() 325 p.Delete(ctx) 326 return -1, wrapError(err) 327 } 328 return int(p.Pid()), nil 329 } 330 331 func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal syscall.Signal) error { 332 p, err := c.getProcess(ctx, containerID, processID) 333 if err != nil { 334 return err 335 } 336 return wrapError(p.Kill(ctx, signal)) 337 } 338 339 func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error { 340 p, err := c.getProcess(ctx, containerID, processID) 341 if err != nil { 342 return err 343 } 344 345 return p.Resize(ctx, uint32(width), uint32(height)) 346 } 347 348 func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error { 349 p, err := c.getProcess(ctx, containerID, processID) 350 if err != nil { 351 return err 352 } 353 354 return p.CloseIO(ctx, containerd.WithStdinCloser) 355 } 356 357 func (c *client) Pause(ctx context.Context, containerID string) error { 358 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 359 if err != nil { 360 return err 361 } 362 363 return wrapError(p.(containerd.Task).Pause(ctx)) 364 } 365 366 func (c *client) Resume(ctx context.Context, containerID string) error { 367 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 368 if err != nil { 369 return err 370 } 371 372 return p.(containerd.Task).Resume(ctx) 373 } 374 375 func (c *client) Stats(ctx context.Context, containerID string) (*libcontainerdtypes.Stats, error) { 376 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 377 if err != nil { 378 return nil, err 379 } 380 381 m, err := p.(containerd.Task).Metrics(ctx) 382 if err != nil { 383 return nil, err 384 } 385 386 v, err := typeurl.UnmarshalAny(m.Data) 387 if err != nil { 388 return nil, err 389 } 390 return libcontainerdtypes.InterfaceToStats(m.Timestamp, v), nil 391 } 392 393 func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) { 394 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 395 if err != nil { 396 return nil, err 397 } 398 399 pis, err := p.(containerd.Task).Pids(ctx) 400 if err != nil { 401 return nil, err 402 } 403 404 var pids []uint32 405 for _, i := range pis { 406 pids = append(pids, i.Pid) 407 } 408 409 return pids, nil 410 } 411 412 func (c *client) Summary(ctx context.Context, containerID string) ([]libcontainerdtypes.Summary, error) { 413 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 414 if err != nil { 415 return nil, err 416 } 417 418 pis, err := p.(containerd.Task).Pids(ctx) 419 if err != nil { 420 return nil, err 421 } 422 423 var infos []libcontainerdtypes.Summary 424 for _, pi := range pis { 425 i, err := typeurl.UnmarshalAny(pi.Info) 426 if err != nil { 427 return nil, errors.Wrap(err, "unable to decode process details") 428 } 429 s, err := summaryFromInterface(i) 430 if err != nil { 431 return nil, err 432 } 433 infos = append(infos, *s) 434 } 435 436 return infos, nil 437 } 438 439 type restoredProcess struct { 440 p containerd.Process 441 } 442 443 func (p *restoredProcess) Delete(ctx context.Context) (uint32, time.Time, error) { 444 if p.p == nil { 445 return 255, time.Now(), nil 446 } 447 status, err := p.p.Delete(ctx) 448 if err != nil { 449 return 255, time.Now(), nil 450 } 451 return status.ExitCode(), status.ExitTime(), nil 452 } 453 454 func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) { 455 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 456 if err != nil { 457 return 255, time.Now(), nil 458 } 459 460 status, err := p.Delete(ctx) 461 if err != nil { 462 return 255, time.Now(), nil 463 } 464 return status.ExitCode(), status.ExitTime(), nil 465 } 466 467 func (c *client) Delete(ctx context.Context, containerID string) error { 468 ctr, err := c.getContainer(ctx, containerID) 469 if err != nil { 470 return err 471 } 472 labels, err := ctr.Labels(ctx) 473 if err != nil { 474 return err 475 } 476 bundle := labels[DockerContainerBundlePath] 477 if err := ctr.Delete(ctx); err != nil { 478 return wrapError(err) 479 } 480 c.oomMu.Lock() 481 delete(c.oom, containerID) 482 c.oomMu.Unlock() 483 c.v2runcoptionsMu.Lock() 484 delete(c.v2runcoptions, containerID) 485 c.v2runcoptionsMu.Unlock() 486 if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" { 487 if err := os.RemoveAll(bundle); err != nil { 488 c.logger.WithError(err).WithFields(logrus.Fields{ 489 "container": containerID, 490 "bundle": bundle, 491 }).Error("failed to remove state dir") 492 } 493 } 494 return nil 495 } 496 497 func (c *client) Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error) { 498 t, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 499 if err != nil { 500 return containerd.Unknown, err 501 } 502 s, err := t.Status(ctx) 503 if err != nil { 504 return containerd.Unknown, wrapError(err) 505 } 506 return s.Status, nil 507 } 508 509 func (c *client) getCheckpointOptions(id string, exit bool) containerd.CheckpointTaskOpts { 510 return func(r *containerd.CheckpointTaskInfo) error { 511 if r.Options == nil { 512 c.v2runcoptionsMu.Lock() 513 _, ok := c.v2runcoptions[id] 514 c.v2runcoptionsMu.Unlock() 515 if ok { 516 r.Options = &v2runcoptions.CheckpointOptions{Exit: exit} 517 } 518 return nil 519 } 520 521 switch opts := r.Options.(type) { 522 case *v2runcoptions.CheckpointOptions: 523 opts.Exit = exit 524 } 525 526 return nil 527 } 528 } 529 530 func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error { 531 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 532 if err != nil { 533 return err 534 } 535 536 opts := []containerd.CheckpointTaskOpts{c.getCheckpointOptions(containerID, exit)} 537 img, err := p.(containerd.Task).Checkpoint(ctx, opts...) 538 if err != nil { 539 return wrapError(err) 540 } 541 // Whatever happens, delete the checkpoint from containerd 542 defer func() { 543 err := c.client.ImageService().Delete(context.Background(), img.Name()) 544 if err != nil { 545 c.logger.WithError(err).WithField("digest", img.Target().Digest). 546 Warnf("failed to delete checkpoint image") 547 } 548 }() 549 550 b, err := content.ReadBlob(ctx, c.client.ContentStore(), img.Target()) 551 if err != nil { 552 return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data")) 553 } 554 var index v1.Index 555 if err := json.Unmarshal(b, &index); err != nil { 556 return errdefs.System(errors.Wrapf(err, "failed to decode checkpoint data")) 557 } 558 559 var cpDesc *v1.Descriptor 560 for _, m := range index.Manifests { 561 m := m 562 if m.MediaType == images.MediaTypeContainerd1Checkpoint { 563 cpDesc = &m //nolint:gosec 564 break 565 } 566 } 567 if cpDesc == nil { 568 return errdefs.System(errors.Wrapf(err, "invalid checkpoint")) 569 } 570 571 rat, err := c.client.ContentStore().ReaderAt(ctx, *cpDesc) 572 if err != nil { 573 return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader")) 574 } 575 defer rat.Close() 576 _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat)) 577 if err != nil { 578 return errdefs.System(errors.Wrapf(err, "failed to read checkpoint reader")) 579 } 580 581 return err 582 } 583 584 func (c *client) getContainer(ctx context.Context, id string) (containerd.Container, error) { 585 ctr, err := c.client.LoadContainer(ctx, id) 586 if err != nil { 587 if containerderrors.IsNotFound(err) { 588 return nil, errors.WithStack(errdefs.NotFound(errors.New("no such container"))) 589 } 590 return nil, wrapError(err) 591 } 592 return ctr, nil 593 } 594 595 func (c *client) getProcess(ctx context.Context, containerID, processID string) (containerd.Process, error) { 596 ctr, err := c.getContainer(ctx, containerID) 597 if err != nil { 598 return nil, err 599 } 600 t, err := ctr.Task(ctx, nil) 601 if err != nil { 602 if containerderrors.IsNotFound(err) { 603 return nil, errors.WithStack(errdefs.NotFound(errors.New("container is not running"))) 604 } 605 return nil, wrapError(err) 606 } 607 if processID == libcontainerdtypes.InitProcessName { 608 return t, nil 609 } 610 p, err := t.LoadProcess(ctx, processID, nil) 611 if err != nil { 612 if containerderrors.IsNotFound(err) { 613 return nil, errors.WithStack(errdefs.NotFound(errors.New("no such exec"))) 614 } 615 return nil, wrapError(err) 616 } 617 return p, nil 618 } 619 620 // createIO creates the io to be used by a process 621 // This needs to get a pointer to interface as upon closure the process may not have yet been registered 622 func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, stdinCloseSync chan struct{}, attachStdio libcontainerdtypes.StdioCallback) (cio.IO, error) { 623 var ( 624 io *cio.DirectIO 625 err error 626 ) 627 io, err = c.newDirectIO(context.Background(), fifos) 628 if err != nil { 629 return nil, err 630 } 631 632 if io.Stdin != nil { 633 var ( 634 err error 635 stdinOnce sync.Once 636 ) 637 pipe := io.Stdin 638 io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error { 639 stdinOnce.Do(func() { 640 err = pipe.Close() 641 // Do the rest in a new routine to avoid a deadlock if the 642 // Exec/Start call failed. 643 go func() { 644 <-stdinCloseSync 645 p, err := c.getProcess(context.Background(), containerID, processID) 646 if err == nil { 647 err = p.CloseIO(context.Background(), containerd.WithStdinCloser) 648 if err != nil && strings.Contains(err.Error(), "transport is closing") { 649 err = nil 650 } 651 } 652 }() 653 }) 654 return err 655 }) 656 } 657 658 rio, err := attachStdio(io) 659 if err != nil { 660 io.Cancel() 661 io.Close() 662 } 663 return rio, err 664 } 665 666 func (c *client) processEvent(ctx context.Context, et libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) { 667 c.eventQ.Append(ei.ContainerID, func() { 668 err := c.backend.ProcessEvent(ei.ContainerID, et, ei) 669 if err != nil { 670 c.logger.WithError(err).WithFields(logrus.Fields{ 671 "container": ei.ContainerID, 672 "event": et, 673 "event-info": ei, 674 }).Error("failed to process event") 675 } 676 677 if et == libcontainerdtypes.EventExit && ei.ProcessID != ei.ContainerID { 678 p, err := c.getProcess(ctx, ei.ContainerID, ei.ProcessID) 679 if err != nil { 680 681 c.logger.WithError(errors.New("no such process")). 682 WithFields(logrus.Fields{ 683 "error": err, 684 "container": ei.ContainerID, 685 "process": ei.ProcessID, 686 }).Error("exit event") 687 return 688 } 689 690 ctr, err := c.getContainer(ctx, ei.ContainerID) 691 if err != nil { 692 c.logger.WithFields(logrus.Fields{ 693 "container": ei.ContainerID, 694 "error": err, 695 }).Error("failed to find container") 696 } else { 697 labels, err := ctr.Labels(ctx) 698 if err != nil { 699 c.logger.WithFields(logrus.Fields{ 700 "container": ei.ContainerID, 701 "error": err, 702 }).Error("failed to get container labels") 703 return 704 } 705 newFIFOSet(labels[DockerContainerBundlePath], ei.ProcessID, true, false).Close() 706 } 707 _, err = p.Delete(context.Background()) 708 if err != nil { 709 c.logger.WithError(err).WithFields(logrus.Fields{ 710 "container": ei.ContainerID, 711 "process": ei.ProcessID, 712 }).Warn("failed to delete process") 713 } 714 } 715 }) 716 } 717 718 func (c *client) waitServe(ctx context.Context) bool { 719 t := 100 * time.Millisecond 720 delay := time.NewTimer(t) 721 if !delay.Stop() { 722 <-delay.C 723 } 724 defer delay.Stop() 725 726 // `IsServing` will actually block until the service is ready. 727 // However it can return early, so we'll loop with a delay to handle it. 728 for { 729 serving, err := c.client.IsServing(ctx) 730 if err != nil { 731 if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { 732 return false 733 } 734 logrus.WithError(err).Warn("Error while testing if containerd API is ready") 735 } 736 737 if serving { 738 return true 739 } 740 741 delay.Reset(t) 742 select { 743 case <-ctx.Done(): 744 return false 745 case <-delay.C: 746 } 747 } 748 } 749 750 func (c *client) processEventStream(ctx context.Context, ns string) { 751 var ( 752 err error 753 ev *events.Envelope 754 et libcontainerdtypes.EventType 755 ei libcontainerdtypes.EventInfo 756 ) 757 758 // Create a new context specifically for this subscription. 759 // The context must be cancelled to cancel the subscription. 760 // In cases where we have to restart event stream processing, 761 // we'll need the original context b/c this one will be cancelled 762 subCtx, cancel := context.WithCancel(ctx) 763 defer cancel() 764 765 // Filter on both namespace *and* topic. To create an "and" filter, 766 // this must be a single, comma-separated string 767 eventStream, errC := c.client.EventService().Subscribe(subCtx, "namespace=="+ns+",topic~=|^/tasks/|") 768 769 c.logger.Debug("processing event stream") 770 771 for { 772 var oomKilled bool 773 select { 774 case err = <-errC: 775 if err != nil { 776 errStatus, ok := status.FromError(err) 777 if !ok || errStatus.Code() != codes.Canceled { 778 c.logger.WithError(err).Error("Failed to get event") 779 c.logger.Info("Waiting for containerd to be ready to restart event processing") 780 if c.waitServe(ctx) { 781 go c.processEventStream(ctx, ns) 782 return 783 } 784 } 785 c.logger.WithError(ctx.Err()).Info("stopping event stream following graceful shutdown") 786 } 787 return 788 case ev = <-eventStream: 789 if ev.Event == nil { 790 c.logger.WithField("event", ev).Warn("invalid event") 791 continue 792 } 793 794 v, err := typeurl.UnmarshalAny(ev.Event) 795 if err != nil { 796 c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event") 797 continue 798 } 799 800 c.logger.WithField("topic", ev.Topic).Debug("event") 801 802 switch t := v.(type) { 803 case *apievents.TaskCreate: 804 et = libcontainerdtypes.EventCreate 805 ei = libcontainerdtypes.EventInfo{ 806 ContainerID: t.ContainerID, 807 ProcessID: t.ContainerID, 808 Pid: t.Pid, 809 } 810 case *apievents.TaskStart: 811 et = libcontainerdtypes.EventStart 812 ei = libcontainerdtypes.EventInfo{ 813 ContainerID: t.ContainerID, 814 ProcessID: t.ContainerID, 815 Pid: t.Pid, 816 } 817 case *apievents.TaskExit: 818 et = libcontainerdtypes.EventExit 819 ei = libcontainerdtypes.EventInfo{ 820 ContainerID: t.ContainerID, 821 ProcessID: t.ID, 822 Pid: t.Pid, 823 ExitCode: t.ExitStatus, 824 ExitedAt: t.ExitedAt, 825 } 826 case *apievents.TaskOOM: 827 et = libcontainerdtypes.EventOOM 828 ei = libcontainerdtypes.EventInfo{ 829 ContainerID: t.ContainerID, 830 OOMKilled: true, 831 } 832 oomKilled = true 833 case *apievents.TaskExecAdded: 834 et = libcontainerdtypes.EventExecAdded 835 ei = libcontainerdtypes.EventInfo{ 836 ContainerID: t.ContainerID, 837 ProcessID: t.ExecID, 838 } 839 case *apievents.TaskExecStarted: 840 et = libcontainerdtypes.EventExecStarted 841 ei = libcontainerdtypes.EventInfo{ 842 ContainerID: t.ContainerID, 843 ProcessID: t.ExecID, 844 Pid: t.Pid, 845 } 846 case *apievents.TaskPaused: 847 et = libcontainerdtypes.EventPaused 848 ei = libcontainerdtypes.EventInfo{ 849 ContainerID: t.ContainerID, 850 } 851 case *apievents.TaskResumed: 852 et = libcontainerdtypes.EventResumed 853 ei = libcontainerdtypes.EventInfo{ 854 ContainerID: t.ContainerID, 855 } 856 case *apievents.TaskDelete: 857 c.logger.WithFields(logrus.Fields{ 858 "topic": ev.Topic, 859 "type": reflect.TypeOf(t), 860 "container": t.ContainerID}, 861 ).Info("ignoring event") 862 continue 863 default: 864 c.logger.WithFields(logrus.Fields{ 865 "topic": ev.Topic, 866 "type": reflect.TypeOf(t)}, 867 ).Info("ignoring event") 868 continue 869 } 870 871 c.oomMu.Lock() 872 if oomKilled { 873 c.oom[ei.ContainerID] = true 874 } 875 ei.OOMKilled = c.oom[ei.ContainerID] 876 c.oomMu.Unlock() 877 878 c.processEvent(ctx, et, ei) 879 } 880 } 881 } 882 883 func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) { 884 writer, err := c.client.ContentStore().Writer(ctx, content.WithRef(ref)) 885 if err != nil { 886 return nil, err 887 } 888 defer writer.Close() 889 size, err := io.Copy(writer, r) 890 if err != nil { 891 return nil, err 892 } 893 labels := map[string]string{ 894 "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339), 895 } 896 if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil { 897 return nil, err 898 } 899 return &types.Descriptor{ 900 MediaType: mediaType, 901 Digest: writer.Digest(), 902 Size_: size, 903 }, nil 904 } 905 906 func (c *client) bundleDir(id string) string { 907 return filepath.Join(c.stateDir, id) 908 } 909 910 func wrapError(err error) error { 911 switch { 912 case err == nil: 913 return nil 914 case containerderrors.IsNotFound(err): 915 return errdefs.NotFound(err) 916 } 917 918 msg := err.Error() 919 for _, s := range []string{"container does not exist", "not found", "no such container"} { 920 if strings.Contains(msg, s) { 921 return errdefs.NotFound(err) 922 } 923 } 924 return err 925 }