github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/libcontainerd/remote/client.go (about) 1 package remote // import "github.com/docker/docker/libcontainerd/remote" 2 3 import ( 4 "context" 5 "encoding/json" 6 "io" 7 "os" 8 "path/filepath" 9 "reflect" 10 "runtime" 11 "strings" 12 "sync" 13 "syscall" 14 "time" 15 16 "github.com/containerd/containerd" 17 apievents "github.com/containerd/containerd/api/events" 18 "github.com/containerd/containerd/api/types" 19 "github.com/containerd/containerd/archive" 20 "github.com/containerd/containerd/cio" 21 "github.com/containerd/containerd/content" 22 containerderrors "github.com/containerd/containerd/errdefs" 23 "github.com/containerd/containerd/events" 24 "github.com/containerd/containerd/images" 25 "github.com/containerd/containerd/runtime/linux/runctypes" 26 v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" 27 "github.com/containerd/typeurl" 28 "github.com/docker/docker/errdefs" 29 "github.com/docker/docker/libcontainerd/queue" 30 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 31 "github.com/docker/docker/pkg/ioutils" 32 v1 "github.com/opencontainers/image-spec/specs-go/v1" 33 specs "github.com/opencontainers/runtime-spec/specs-go" 34 "github.com/pkg/errors" 35 "github.com/sirupsen/logrus" 36 "google.golang.org/grpc/codes" 37 "google.golang.org/grpc/status" 38 ) 39 40 // DockerContainerBundlePath is the label key pointing to the container's bundle path 41 const DockerContainerBundlePath = "com.docker/engine.bundle.path" 42 43 type client struct { 44 client *containerd.Client 45 stateDir string 46 logger *logrus.Entry 47 ns string 48 49 backend libcontainerdtypes.Backend 50 eventQ queue.Queue 51 oomMu sync.Mutex 52 oom map[string]bool 53 v2runcoptionsMu sync.Mutex 54 // v2runcoptions is used for copying options specified on Create() to Start() 55 v2runcoptions map[string]v2runcoptions.Options 56 } 57 58 // NewClient creates a new libcontainerd client from a containerd client 59 func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) { 60 c := &client{ 61 client: cli, 62 stateDir: stateDir, 63 logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns), 64 ns: ns, 65 backend: b, 66 oom: make(map[string]bool), 67 v2runcoptions: make(map[string]v2runcoptions.Options), 68 } 69 70 go c.processEventStream(ctx, ns) 71 72 return c, nil 73 } 74 75 func (c *client) Version(ctx context.Context) (containerd.Version, error) { 76 return c.client.Version(ctx) 77 } 78 79 // Restore loads the containerd container. 80 // It should not be called concurrently with any other operation for the given ID. 81 func (c *client) Restore(ctx context.Context, id string, attachStdio libcontainerdtypes.StdioCallback) (alive bool, pid int, p libcontainerdtypes.Process, err error) { 82 var dio *cio.DirectIO 83 defer func() { 84 if err != nil && dio != nil { 85 dio.Cancel() 86 dio.Close() 87 } 88 err = wrapError(err) 89 }() 90 91 ctr, err := c.client.LoadContainer(ctx, id) 92 if err != nil { 93 return false, -1, nil, errors.WithStack(wrapError(err)) 94 } 95 96 attachIO := func(fifos *cio.FIFOSet) (cio.IO, error) { 97 // dio must be assigned to the previously defined dio for the defer above 98 // to handle cleanup 99 dio, err = c.newDirectIO(ctx, fifos) 100 if err != nil { 101 return nil, err 102 } 103 return attachStdio(dio) 104 } 105 t, err := ctr.Task(ctx, attachIO) 106 if err != nil && !containerderrors.IsNotFound(err) { 107 return false, -1, nil, errors.Wrap(wrapError(err), "error getting containerd task for container") 108 } 109 110 if t != nil { 111 s, err := t.Status(ctx) 112 if err != nil { 113 return false, -1, nil, errors.Wrap(wrapError(err), "error getting task status") 114 } 115 alive = s.Status != containerd.Stopped 116 pid = int(t.Pid()) 117 } 118 119 c.logger.WithFields(logrus.Fields{ 120 "container": id, 121 "alive": alive, 122 "pid": pid, 123 }).Debug("restored container") 124 125 return alive, pid, &restoredProcess{ 126 p: t, 127 }, nil 128 } 129 130 func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error { 131 bdir := c.bundleDir(id) 132 c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created") 133 134 newOpts := []containerd.NewContainerOpts{ 135 containerd.WithSpec(ociSpec), 136 containerd.WithRuntime(shim, runtimeOptions), 137 WithBundle(bdir, ociSpec), 138 } 139 opts = append(opts, newOpts...) 140 141 _, err := c.client.NewContainer(ctx, id, opts...) 142 if err != nil { 143 if containerderrors.IsAlreadyExists(err) { 144 return errors.WithStack(errdefs.Conflict(errors.New("id already in use"))) 145 } 146 return wrapError(err) 147 } 148 if x, ok := runtimeOptions.(*v2runcoptions.Options); ok { 149 c.v2runcoptionsMu.Lock() 150 c.v2runcoptions[id] = *x 151 c.v2runcoptionsMu.Unlock() 152 } 153 return nil 154 } 155 156 // Start create and start a task for the specified containerd id 157 func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) { 158 ctr, err := c.getContainer(ctx, id) 159 if err != nil { 160 return -1, err 161 } 162 var ( 163 cp *types.Descriptor 164 t containerd.Task 165 rio cio.IO 166 stdinCloseSync = make(chan struct{}) 167 ) 168 169 if checkpointDir != "" { 170 // write checkpoint to the content store 171 tar := archive.Diff(ctx, "", checkpointDir) 172 cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar) 173 // remove the checkpoint when we're done 174 defer func() { 175 if cp != nil { 176 err := c.client.ContentStore().Delete(context.Background(), cp.Digest) 177 if err != nil { 178 c.logger.WithError(err).WithFields(logrus.Fields{ 179 "ref": checkpointDir, 180 "digest": cp.Digest, 181 }).Warnf("failed to delete temporary checkpoint entry") 182 } 183 } 184 }() 185 if err := tar.Close(); err != nil { 186 return -1, errors.Wrap(err, "failed to close checkpoint tar stream") 187 } 188 if err != nil { 189 return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd") 190 } 191 } 192 193 spec, err := ctr.Spec(ctx) 194 if err != nil { 195 return -1, errors.Wrap(err, "failed to retrieve spec") 196 } 197 labels, err := ctr.Labels(ctx) 198 if err != nil { 199 return -1, errors.Wrap(err, "failed to retrieve labels") 200 } 201 bundle := labels[DockerContainerBundlePath] 202 uid, gid := getSpecUser(spec) 203 204 taskOpts := []containerd.NewTaskOpts{ 205 func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { 206 info.Checkpoint = cp 207 return nil 208 }, 209 } 210 211 if runtime.GOOS != "windows" { 212 taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { 213 c.v2runcoptionsMu.Lock() 214 opts, ok := c.v2runcoptions[id] 215 c.v2runcoptionsMu.Unlock() 216 if ok { 217 opts.IoUid = uint32(uid) 218 opts.IoGid = uint32(gid) 219 info.Options = &opts 220 } else { 221 info.Options = &runctypes.CreateOptions{ 222 IoUid: uint32(uid), 223 IoGid: uint32(gid), 224 NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "", 225 } 226 } 227 return nil 228 }) 229 } else { 230 taskOpts = append(taskOpts, withLogLevel(c.logger.Level)) 231 } 232 233 t, err = ctr.NewTask(ctx, 234 func(id string) (cio.IO, error) { 235 fifos := newFIFOSet(bundle, libcontainerdtypes.InitProcessName, withStdin, spec.Process.Terminal) 236 237 rio, err = c.createIO(fifos, id, libcontainerdtypes.InitProcessName, stdinCloseSync, attachStdio) 238 return rio, err 239 }, 240 taskOpts..., 241 ) 242 if err != nil { 243 close(stdinCloseSync) 244 if rio != nil { 245 rio.Cancel() 246 rio.Close() 247 } 248 return -1, wrapError(err) 249 } 250 251 // Signal c.createIO that it can call CloseIO 252 close(stdinCloseSync) 253 254 if err := t.Start(ctx); err != nil { 255 if _, err := t.Delete(ctx); err != nil { 256 c.logger.WithError(err).WithField("container", id). 257 Error("failed to delete task after fail start") 258 } 259 return -1, wrapError(err) 260 } 261 262 return int(t.Pid()), nil 263 } 264 265 // Exec creates exec process. 266 // 267 // The containerd client calls Exec to register the exec config in the shim side. 268 // When the client calls Start, the shim will create stdin fifo if needs. But 269 // for the container main process, the stdin fifo will be created in Create not 270 // the Start call. stdinCloseSync channel should be closed after Start exec 271 // process. 272 func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) { 273 ctr, err := c.getContainer(ctx, containerID) 274 if err != nil { 275 return -1, err 276 } 277 t, err := ctr.Task(ctx, nil) 278 if err != nil { 279 if containerderrors.IsNotFound(err) { 280 return -1, errors.WithStack(errdefs.InvalidParameter(errors.New("container is not running"))) 281 } 282 return -1, wrapError(err) 283 } 284 285 var ( 286 p containerd.Process 287 rio cio.IO 288 stdinCloseSync = make(chan struct{}) 289 ) 290 291 labels, err := ctr.Labels(ctx) 292 if err != nil { 293 return -1, wrapError(err) 294 } 295 296 fifos := newFIFOSet(labels[DockerContainerBundlePath], processID, withStdin, spec.Terminal) 297 298 defer func() { 299 if err != nil { 300 if rio != nil { 301 rio.Cancel() 302 rio.Close() 303 } 304 } 305 }() 306 307 p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) { 308 rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio) 309 return rio, err 310 }) 311 if err != nil { 312 close(stdinCloseSync) 313 if containerderrors.IsAlreadyExists(err) { 314 return -1, errors.WithStack(errdefs.Conflict(errors.New("id already in use"))) 315 } 316 return -1, wrapError(err) 317 } 318 319 // Signal c.createIO that it can call CloseIO 320 // 321 // the stdin of exec process will be created after p.Start in containerd 322 defer close(stdinCloseSync) 323 324 if err = p.Start(ctx); err != nil { 325 // use new context for cleanup because old one may be cancelled by user, but leave a timeout to make sure 326 // we are not waiting forever if containerd is unresponsive or to work around fifo cancelling issues in 327 // older containerd-shim 328 ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second) 329 defer cancel() 330 p.Delete(ctx) 331 return -1, wrapError(err) 332 } 333 return int(p.Pid()), nil 334 } 335 336 func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error { 337 p, err := c.getProcess(ctx, containerID, processID) 338 if err != nil { 339 return err 340 } 341 return wrapError(p.Kill(ctx, syscall.Signal(signal))) 342 } 343 344 func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error { 345 p, err := c.getProcess(ctx, containerID, processID) 346 if err != nil { 347 return err 348 } 349 350 return p.Resize(ctx, uint32(width), uint32(height)) 351 } 352 353 func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error { 354 p, err := c.getProcess(ctx, containerID, processID) 355 if err != nil { 356 return err 357 } 358 359 return p.CloseIO(ctx, containerd.WithStdinCloser) 360 } 361 362 func (c *client) Pause(ctx context.Context, containerID string) error { 363 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 364 if err != nil { 365 return err 366 } 367 368 return wrapError(p.(containerd.Task).Pause(ctx)) 369 } 370 371 func (c *client) Resume(ctx context.Context, containerID string) error { 372 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 373 if err != nil { 374 return err 375 } 376 377 return p.(containerd.Task).Resume(ctx) 378 } 379 380 func (c *client) Stats(ctx context.Context, containerID string) (*libcontainerdtypes.Stats, error) { 381 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 382 if err != nil { 383 return nil, err 384 } 385 386 m, err := p.(containerd.Task).Metrics(ctx) 387 if err != nil { 388 return nil, err 389 } 390 391 v, err := typeurl.UnmarshalAny(m.Data) 392 if err != nil { 393 return nil, err 394 } 395 return libcontainerdtypes.InterfaceToStats(m.Timestamp, v), nil 396 } 397 398 func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) { 399 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 400 if err != nil { 401 return nil, err 402 } 403 404 pis, err := p.(containerd.Task).Pids(ctx) 405 if err != nil { 406 return nil, err 407 } 408 409 var pids []uint32 410 for _, i := range pis { 411 pids = append(pids, i.Pid) 412 } 413 414 return pids, nil 415 } 416 417 func (c *client) Summary(ctx context.Context, containerID string) ([]libcontainerdtypes.Summary, error) { 418 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 419 if err != nil { 420 return nil, err 421 } 422 423 pis, err := p.(containerd.Task).Pids(ctx) 424 if err != nil { 425 return nil, err 426 } 427 428 var infos []libcontainerdtypes.Summary 429 for _, pi := range pis { 430 i, err := typeurl.UnmarshalAny(pi.Info) 431 if err != nil { 432 return nil, errors.Wrap(err, "unable to decode process details") 433 } 434 s, err := summaryFromInterface(i) 435 if err != nil { 436 return nil, err 437 } 438 infos = append(infos, *s) 439 } 440 441 return infos, nil 442 } 443 444 type restoredProcess struct { 445 p containerd.Process 446 } 447 448 func (p *restoredProcess) Delete(ctx context.Context) (uint32, time.Time, error) { 449 if p.p == nil { 450 return 255, time.Now(), nil 451 } 452 status, err := p.p.Delete(ctx) 453 if err != nil { 454 return 255, time.Now(), nil 455 } 456 return status.ExitCode(), status.ExitTime(), nil 457 } 458 459 func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) { 460 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 461 if err != nil { 462 return 255, time.Now(), nil 463 } 464 465 status, err := p.Delete(ctx) 466 if err != nil { 467 return 255, time.Now(), nil 468 } 469 return status.ExitCode(), status.ExitTime(), nil 470 } 471 472 func (c *client) Delete(ctx context.Context, containerID string) error { 473 ctr, err := c.getContainer(ctx, containerID) 474 if err != nil { 475 return err 476 } 477 labels, err := ctr.Labels(ctx) 478 if err != nil { 479 return err 480 } 481 bundle := labels[DockerContainerBundlePath] 482 if err := ctr.Delete(ctx); err != nil { 483 return wrapError(err) 484 } 485 c.oomMu.Lock() 486 delete(c.oom, containerID) 487 c.oomMu.Unlock() 488 c.v2runcoptionsMu.Lock() 489 delete(c.v2runcoptions, containerID) 490 c.v2runcoptionsMu.Unlock() 491 if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" { 492 if err := os.RemoveAll(bundle); err != nil { 493 c.logger.WithError(err).WithFields(logrus.Fields{ 494 "container": containerID, 495 "bundle": bundle, 496 }).Error("failed to remove state dir") 497 } 498 } 499 return nil 500 } 501 502 func (c *client) Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error) { 503 t, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 504 if err != nil { 505 return containerd.Unknown, err 506 } 507 s, err := t.Status(ctx) 508 if err != nil { 509 return containerd.Unknown, wrapError(err) 510 } 511 return s.Status, nil 512 } 513 514 func (c *client) getCheckpointOptions(id string, exit bool) containerd.CheckpointTaskOpts { 515 return func(r *containerd.CheckpointTaskInfo) error { 516 if r.Options == nil { 517 c.v2runcoptionsMu.Lock() 518 _, isV2 := c.v2runcoptions[id] 519 c.v2runcoptionsMu.Unlock() 520 521 if isV2 { 522 r.Options = &v2runcoptions.CheckpointOptions{Exit: exit} 523 } else { 524 r.Options = &runctypes.CheckpointOptions{Exit: exit} 525 } 526 return nil 527 } 528 529 switch opts := r.Options.(type) { 530 case *v2runcoptions.CheckpointOptions: 531 opts.Exit = exit 532 case *runctypes.CheckpointOptions: 533 opts.Exit = exit 534 } 535 536 return nil 537 } 538 } 539 540 func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error { 541 p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName) 542 if err != nil { 543 return err 544 } 545 546 opts := []containerd.CheckpointTaskOpts{c.getCheckpointOptions(containerID, exit)} 547 img, err := p.(containerd.Task).Checkpoint(ctx, opts...) 548 if err != nil { 549 return wrapError(err) 550 } 551 // Whatever happens, delete the checkpoint from containerd 552 defer func() { 553 err := c.client.ImageService().Delete(context.Background(), img.Name()) 554 if err != nil { 555 c.logger.WithError(err).WithField("digest", img.Target().Digest). 556 Warnf("failed to delete checkpoint image") 557 } 558 }() 559 560 b, err := content.ReadBlob(ctx, c.client.ContentStore(), img.Target()) 561 if err != nil { 562 return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data")) 563 } 564 var index v1.Index 565 if err := json.Unmarshal(b, &index); err != nil { 566 return errdefs.System(errors.Wrapf(err, "failed to decode checkpoint data")) 567 } 568 569 var cpDesc *v1.Descriptor 570 for _, m := range index.Manifests { 571 m := m 572 if m.MediaType == images.MediaTypeContainerd1Checkpoint { 573 cpDesc = &m 574 break 575 } 576 } 577 if cpDesc == nil { 578 return errdefs.System(errors.Wrapf(err, "invalid checkpoint")) 579 } 580 581 rat, err := c.client.ContentStore().ReaderAt(ctx, *cpDesc) 582 if err != nil { 583 return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader")) 584 } 585 defer rat.Close() 586 _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat)) 587 if err != nil { 588 return errdefs.System(errors.Wrapf(err, "failed to read checkpoint reader")) 589 } 590 591 return err 592 } 593 594 func (c *client) getContainer(ctx context.Context, id string) (containerd.Container, error) { 595 ctr, err := c.client.LoadContainer(ctx, id) 596 if err != nil { 597 if containerderrors.IsNotFound(err) { 598 return nil, errors.WithStack(errdefs.NotFound(errors.New("no such container"))) 599 } 600 return nil, wrapError(err) 601 } 602 return ctr, nil 603 } 604 605 func (c *client) getProcess(ctx context.Context, containerID, processID string) (containerd.Process, error) { 606 ctr, err := c.getContainer(ctx, containerID) 607 if err != nil { 608 return nil, err 609 } 610 t, err := ctr.Task(ctx, nil) 611 if err != nil { 612 if containerderrors.IsNotFound(err) { 613 return nil, errors.WithStack(errdefs.NotFound(errors.New("container is not running"))) 614 } 615 return nil, wrapError(err) 616 } 617 if processID == libcontainerdtypes.InitProcessName { 618 return t, nil 619 } 620 p, err := t.LoadProcess(ctx, processID, nil) 621 if err != nil { 622 if containerderrors.IsNotFound(err) { 623 return nil, errors.WithStack(errdefs.NotFound(errors.New("no such exec"))) 624 } 625 return nil, wrapError(err) 626 } 627 return p, nil 628 } 629 630 // createIO creates the io to be used by a process 631 // This needs to get a pointer to interface as upon closure the process may not have yet been registered 632 func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, stdinCloseSync chan struct{}, attachStdio libcontainerdtypes.StdioCallback) (cio.IO, error) { 633 var ( 634 io *cio.DirectIO 635 err error 636 ) 637 io, err = c.newDirectIO(context.Background(), fifos) 638 if err != nil { 639 return nil, err 640 } 641 642 if io.Stdin != nil { 643 var ( 644 err error 645 stdinOnce sync.Once 646 ) 647 pipe := io.Stdin 648 io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error { 649 stdinOnce.Do(func() { 650 err = pipe.Close() 651 // Do the rest in a new routine to avoid a deadlock if the 652 // Exec/Start call failed. 653 go func() { 654 <-stdinCloseSync 655 p, err := c.getProcess(context.Background(), containerID, processID) 656 if err == nil { 657 err = p.CloseIO(context.Background(), containerd.WithStdinCloser) 658 if err != nil && strings.Contains(err.Error(), "transport is closing") { 659 err = nil 660 } 661 } 662 }() 663 }) 664 return err 665 }) 666 } 667 668 rio, err := attachStdio(io) 669 if err != nil { 670 io.Cancel() 671 io.Close() 672 } 673 return rio, err 674 } 675 676 func (c *client) processEvent(ctx context.Context, et libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) { 677 c.eventQ.Append(ei.ContainerID, func() { 678 err := c.backend.ProcessEvent(ei.ContainerID, et, ei) 679 if err != nil { 680 c.logger.WithError(err).WithFields(logrus.Fields{ 681 "container": ei.ContainerID, 682 "event": et, 683 "event-info": ei, 684 }).Error("failed to process event") 685 } 686 687 if et == libcontainerdtypes.EventExit && ei.ProcessID != ei.ContainerID { 688 p, err := c.getProcess(ctx, ei.ContainerID, ei.ProcessID) 689 if err != nil { 690 691 c.logger.WithError(errors.New("no such process")). 692 WithFields(logrus.Fields{ 693 "error": err, 694 "container": ei.ContainerID, 695 "process": ei.ProcessID, 696 }).Error("exit event") 697 return 698 } 699 700 ctr, err := c.getContainer(ctx, ei.ContainerID) 701 if err != nil { 702 c.logger.WithFields(logrus.Fields{ 703 "container": ei.ContainerID, 704 "error": err, 705 }).Error("failed to find container") 706 } else { 707 labels, err := ctr.Labels(ctx) 708 if err != nil { 709 c.logger.WithFields(logrus.Fields{ 710 "container": ei.ContainerID, 711 "error": err, 712 }).Error("failed to get container labels") 713 return 714 } 715 newFIFOSet(labels[DockerContainerBundlePath], ei.ProcessID, true, false).Close() 716 } 717 _, err = p.Delete(context.Background()) 718 if err != nil { 719 c.logger.WithError(err).WithFields(logrus.Fields{ 720 "container": ei.ContainerID, 721 "process": ei.ProcessID, 722 }).Warn("failed to delete process") 723 } 724 } 725 }) 726 } 727 728 func (c *client) waitServe(ctx context.Context) bool { 729 t := 100 * time.Millisecond 730 delay := time.NewTimer(t) 731 if !delay.Stop() { 732 <-delay.C 733 } 734 defer delay.Stop() 735 736 // `IsServing` will actually block until the service is ready. 737 // However it can return early, so we'll loop with a delay to handle it. 738 for { 739 serving, err := c.client.IsServing(ctx) 740 if err != nil { 741 if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { 742 return false 743 } 744 logrus.WithError(err).Warn("Error while testing if containerd API is ready") 745 } 746 747 if serving { 748 return true 749 } 750 751 delay.Reset(t) 752 select { 753 case <-ctx.Done(): 754 return false 755 case <-delay.C: 756 } 757 } 758 } 759 760 func (c *client) processEventStream(ctx context.Context, ns string) { 761 var ( 762 err error 763 ev *events.Envelope 764 et libcontainerdtypes.EventType 765 ei libcontainerdtypes.EventInfo 766 ) 767 768 // Create a new context specifically for this subscription. 769 // The context must be cancelled to cancel the subscription. 770 // In cases where we have to restart event stream processing, 771 // we'll need the original context b/c this one will be cancelled 772 subCtx, cancel := context.WithCancel(ctx) 773 defer cancel() 774 775 // Filter on both namespace *and* topic. To create an "and" filter, 776 // this must be a single, comma-separated string 777 eventStream, errC := c.client.EventService().Subscribe(subCtx, "namespace=="+ns+",topic~=|^/tasks/|") 778 779 c.logger.Debug("processing event stream") 780 781 for { 782 var oomKilled bool 783 select { 784 case err = <-errC: 785 if err != nil { 786 errStatus, ok := status.FromError(err) 787 if !ok || errStatus.Code() != codes.Canceled { 788 c.logger.WithError(err).Error("Failed to get event") 789 c.logger.Info("Waiting for containerd to be ready to restart event processing") 790 if c.waitServe(ctx) { 791 go c.processEventStream(ctx, ns) 792 return 793 } 794 } 795 c.logger.WithError(ctx.Err()).Info("stopping event stream following graceful shutdown") 796 } 797 return 798 case ev = <-eventStream: 799 if ev.Event == nil { 800 c.logger.WithField("event", ev).Warn("invalid event") 801 continue 802 } 803 804 v, err := typeurl.UnmarshalAny(ev.Event) 805 if err != nil { 806 c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event") 807 continue 808 } 809 810 c.logger.WithField("topic", ev.Topic).Debug("event") 811 812 switch t := v.(type) { 813 case *apievents.TaskCreate: 814 et = libcontainerdtypes.EventCreate 815 ei = libcontainerdtypes.EventInfo{ 816 ContainerID: t.ContainerID, 817 ProcessID: t.ContainerID, 818 Pid: t.Pid, 819 } 820 case *apievents.TaskStart: 821 et = libcontainerdtypes.EventStart 822 ei = libcontainerdtypes.EventInfo{ 823 ContainerID: t.ContainerID, 824 ProcessID: t.ContainerID, 825 Pid: t.Pid, 826 } 827 case *apievents.TaskExit: 828 et = libcontainerdtypes.EventExit 829 ei = libcontainerdtypes.EventInfo{ 830 ContainerID: t.ContainerID, 831 ProcessID: t.ID, 832 Pid: t.Pid, 833 ExitCode: t.ExitStatus, 834 ExitedAt: t.ExitedAt, 835 } 836 case *apievents.TaskOOM: 837 et = libcontainerdtypes.EventOOM 838 ei = libcontainerdtypes.EventInfo{ 839 ContainerID: t.ContainerID, 840 OOMKilled: true, 841 } 842 oomKilled = true 843 case *apievents.TaskExecAdded: 844 et = libcontainerdtypes.EventExecAdded 845 ei = libcontainerdtypes.EventInfo{ 846 ContainerID: t.ContainerID, 847 ProcessID: t.ExecID, 848 } 849 case *apievents.TaskExecStarted: 850 et = libcontainerdtypes.EventExecStarted 851 ei = libcontainerdtypes.EventInfo{ 852 ContainerID: t.ContainerID, 853 ProcessID: t.ExecID, 854 Pid: t.Pid, 855 } 856 case *apievents.TaskPaused: 857 et = libcontainerdtypes.EventPaused 858 ei = libcontainerdtypes.EventInfo{ 859 ContainerID: t.ContainerID, 860 } 861 case *apievents.TaskResumed: 862 et = libcontainerdtypes.EventResumed 863 ei = libcontainerdtypes.EventInfo{ 864 ContainerID: t.ContainerID, 865 } 866 case *apievents.TaskDelete: 867 c.logger.WithFields(logrus.Fields{ 868 "topic": ev.Topic, 869 "type": reflect.TypeOf(t), 870 "container": t.ContainerID}, 871 ).Info("ignoring event") 872 continue 873 default: 874 c.logger.WithFields(logrus.Fields{ 875 "topic": ev.Topic, 876 "type": reflect.TypeOf(t)}, 877 ).Info("ignoring event") 878 continue 879 } 880 881 c.oomMu.Lock() 882 if oomKilled { 883 c.oom[ei.ContainerID] = true 884 } 885 ei.OOMKilled = c.oom[ei.ContainerID] 886 c.oomMu.Unlock() 887 888 c.processEvent(ctx, et, ei) 889 } 890 } 891 } 892 893 func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) { 894 writer, err := c.client.ContentStore().Writer(ctx, content.WithRef(ref)) 895 if err != nil { 896 return nil, err 897 } 898 defer writer.Close() 899 size, err := io.Copy(writer, r) 900 if err != nil { 901 return nil, err 902 } 903 labels := map[string]string{ 904 "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339), 905 } 906 if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil { 907 return nil, err 908 } 909 return &types.Descriptor{ 910 MediaType: mediaType, 911 Digest: writer.Digest(), 912 Size_: size, 913 }, nil 914 } 915 916 func (c *client) bundleDir(id string) string { 917 return filepath.Join(c.stateDir, id) 918 } 919 920 func wrapError(err error) error { 921 switch { 922 case err == nil: 923 return nil 924 case containerderrors.IsNotFound(err): 925 return errdefs.NotFound(err) 926 } 927 928 msg := err.Error() 929 for _, s := range []string{"container does not exist", "not found", "no such container"} { 930 if strings.Contains(msg, s) { 931 return errdefs.NotFound(err) 932 } 933 } 934 return err 935 }