github.com/containerd/Containerd@v1.4.13/runtime/v2/runc/v1/service.go (about) 1 // +build linux 2 3 /* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package v1 20 21 import ( 22 "context" 23 "io/ioutil" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sync" 28 "syscall" 29 "time" 30 31 "github.com/containerd/cgroups" 32 eventstypes "github.com/containerd/containerd/api/events" 33 "github.com/containerd/containerd/api/types/task" 34 "github.com/containerd/containerd/errdefs" 35 "github.com/containerd/containerd/mount" 36 "github.com/containerd/containerd/namespaces" 37 "github.com/containerd/containerd/pkg/oom" 38 oomv1 "github.com/containerd/containerd/pkg/oom/v1" 39 "github.com/containerd/containerd/pkg/process" 40 "github.com/containerd/containerd/pkg/stdio" 41 "github.com/containerd/containerd/runtime/v2/runc" 42 "github.com/containerd/containerd/runtime/v2/runc/options" 43 "github.com/containerd/containerd/runtime/v2/shim" 44 taskAPI "github.com/containerd/containerd/runtime/v2/task" 45 "github.com/containerd/containerd/sys/reaper" 46 runcC "github.com/containerd/go-runc" 47 "github.com/containerd/typeurl" 48 "github.com/gogo/protobuf/proto" 49 ptypes "github.com/gogo/protobuf/types" 50 "github.com/pkg/errors" 51 "github.com/sirupsen/logrus" 52 "golang.org/x/sys/unix" 53 ) 54 55 var ( 56 _ = (taskAPI.TaskService)(&service{}) 57 empty = &ptypes.Empty{} 58 ) 59 60 // New returns a new shim service that can be used via GRPC 61 func New(ctx context.Context, id string, publisher shim.Publisher, shutdown func()) (shim.Shim, error) { 62 ep, err := oomv1.New(publisher) 63 if err != nil { 64 return nil, err 65 } 66 go ep.Run(ctx) 67 s := &service{ 68 id: id, 69 context: ctx, 70 events: make(chan interface{}, 128), 71 ec: reaper.Default.Subscribe(), 72 ep: ep, 73 cancel: shutdown, 74 } 75 go s.processExits() 76 runcC.Monitor = reaper.Default 77 if err := s.initPlatform(); err != nil { 78 shutdown() 79 return nil, errors.Wrap(err, "failed to initialized platform behavior") 80 } 81 go s.forward(ctx, publisher) 82 return s, nil 83 } 84 85 // service is the shim implementation of a remote shim over GRPC 86 type service struct { 87 mu sync.Mutex 88 eventSendMu sync.Mutex 89 90 context context.Context 91 events chan interface{} 92 platform stdio.Platform 93 ec chan runcC.Exit 94 ep oom.Watcher 95 96 id string 97 container *runc.Container 98 99 cancel func() 100 } 101 102 func newCommand(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (*exec.Cmd, error) { 103 ns, err := namespaces.NamespaceRequired(ctx) 104 if err != nil { 105 return nil, err 106 } 107 self, err := os.Executable() 108 if err != nil { 109 return nil, err 110 } 111 cwd, err := os.Getwd() 112 if err != nil { 113 return nil, err 114 } 115 args := []string{ 116 "-namespace", ns, 117 "-id", id, 118 "-address", containerdAddress, 119 } 120 cmd := exec.Command(self, args...) 121 cmd.Dir = cwd 122 cmd.Env = append(os.Environ(), "GOMAXPROCS=2") 123 cmd.SysProcAttr = &syscall.SysProcAttr{ 124 Setpgid: true, 125 } 126 return cmd, nil 127 } 128 129 func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (_ string, retErr error) { 130 cmd, err := newCommand(ctx, id, containerdBinary, containerdAddress, containerdTTRPCAddress) 131 if err != nil { 132 return "", err 133 } 134 address, err := shim.SocketAddress(ctx, containerdAddress, id) 135 if err != nil { 136 return "", err 137 } 138 socket, err := shim.NewSocket(address) 139 if err != nil { 140 if !shim.SocketEaddrinuse(err) { 141 return "", err 142 } 143 if err := shim.RemoveSocket(address); err != nil { 144 return "", errors.Wrap(err, "remove already used socket") 145 } 146 if socket, err = shim.NewSocket(address); err != nil { 147 return "", err 148 } 149 } 150 defer func() { 151 if retErr != nil { 152 socket.Close() 153 _ = shim.RemoveSocket(address) 154 } 155 }() 156 // make sure that reexec shim-v2 binary use the value if need 157 if err := shim.WriteAddress("address", address); err != nil { 158 return "", err 159 } 160 161 f, err := socket.File() 162 if err != nil { 163 return "", err 164 } 165 166 cmd.ExtraFiles = append(cmd.ExtraFiles, f) 167 168 if err := cmd.Start(); err != nil { 169 f.Close() 170 return "", err 171 } 172 defer func() { 173 if retErr != nil { 174 cmd.Process.Kill() 175 } 176 }() 177 // make sure to wait after start 178 go cmd.Wait() 179 if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { 180 return "", err 181 } 182 if data, err := ioutil.ReadAll(os.Stdin); err == nil { 183 if len(data) > 0 { 184 var any ptypes.Any 185 if err := proto.Unmarshal(data, &any); err != nil { 186 return "", err 187 } 188 v, err := typeurl.UnmarshalAny(&any) 189 if err != nil { 190 return "", err 191 } 192 if opts, ok := v.(*options.Options); ok { 193 if opts.ShimCgroup != "" { 194 cg, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(opts.ShimCgroup)) 195 if err != nil { 196 return "", errors.Wrapf(err, "failed to load cgroup %s", opts.ShimCgroup) 197 } 198 if err := cg.Add(cgroups.Process{ 199 Pid: cmd.Process.Pid, 200 }); err != nil { 201 return "", errors.Wrapf(err, "failed to join cgroup %s", opts.ShimCgroup) 202 } 203 } 204 } 205 } 206 } 207 if err := shim.AdjustOOMScore(cmd.Process.Pid); err != nil { 208 return "", errors.Wrap(err, "failed to adjust OOM score for shim") 209 } 210 return address, nil 211 } 212 213 func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { 214 if address, err := shim.ReadAddress("address"); err == nil { 215 if err = shim.RemoveSocket(address); err != nil { 216 return nil, err 217 } 218 } 219 220 path, err := os.Getwd() 221 if err != nil { 222 return nil, err 223 } 224 ns, err := namespaces.NamespaceRequired(ctx) 225 if err != nil { 226 return nil, err 227 } 228 runtime, err := runc.ReadRuntime(path) 229 if err != nil { 230 return nil, err 231 } 232 opts, err := runc.ReadOptions(path) 233 if err != nil { 234 return nil, err 235 } 236 root := process.RuncRoot 237 if opts != nil && opts.Root != "" { 238 root = opts.Root 239 } 240 241 r := process.NewRunc(root, path, ns, runtime, "", false) 242 if err := r.Delete(ctx, s.id, &runcC.DeleteOpts{ 243 Force: true, 244 }); err != nil { 245 logrus.WithError(err).Warn("failed to remove runc container") 246 } 247 if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { 248 logrus.WithError(err).Warn("failed to cleanup rootfs mount") 249 } 250 return &taskAPI.DeleteResponse{ 251 ExitedAt: time.Now(), 252 ExitStatus: 128 + uint32(unix.SIGKILL), 253 }, nil 254 } 255 256 // Create a new initial process and container with the underlying OCI runtime 257 func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { 258 s.mu.Lock() 259 defer s.mu.Unlock() 260 261 container, err := runc.NewContainer(ctx, s.platform, r) 262 if err != nil { 263 return nil, err 264 } 265 266 s.container = container 267 268 s.send(&eventstypes.TaskCreate{ 269 ContainerID: r.ID, 270 Bundle: r.Bundle, 271 Rootfs: r.Rootfs, 272 IO: &eventstypes.TaskIO{ 273 Stdin: r.Stdin, 274 Stdout: r.Stdout, 275 Stderr: r.Stderr, 276 Terminal: r.Terminal, 277 }, 278 Checkpoint: r.Checkpoint, 279 Pid: uint32(container.Pid()), 280 }) 281 282 return &taskAPI.CreateTaskResponse{ 283 Pid: uint32(container.Pid()), 284 }, nil 285 } 286 287 // Start a process 288 func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { 289 container, err := s.getContainer() 290 if err != nil { 291 return nil, err 292 } 293 294 // hold the send lock so that the start events are sent before any exit events in the error case 295 s.eventSendMu.Lock() 296 p, err := container.Start(ctx, r) 297 if err != nil { 298 s.eventSendMu.Unlock() 299 return nil, errdefs.ToGRPC(err) 300 } 301 switch r.ExecID { 302 case "": 303 if cg, ok := container.Cgroup().(cgroups.Cgroup); ok { 304 if err := s.ep.Add(container.ID, cg); err != nil { 305 logrus.WithError(err).Error("add cg to OOM monitor") 306 } 307 } else { 308 logrus.WithError(errdefs.ErrNotImplemented).Error("add cg to OOM monitor") 309 } 310 s.send(&eventstypes.TaskStart{ 311 ContainerID: container.ID, 312 Pid: uint32(p.Pid()), 313 }) 314 default: 315 s.send(&eventstypes.TaskExecStarted{ 316 ContainerID: container.ID, 317 ExecID: r.ExecID, 318 Pid: uint32(p.Pid()), 319 }) 320 } 321 s.eventSendMu.Unlock() 322 return &taskAPI.StartResponse{ 323 Pid: uint32(p.Pid()), 324 }, nil 325 } 326 327 // Delete the initial process and container 328 func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { 329 container, err := s.getContainer() 330 if err != nil { 331 return nil, err 332 } 333 p, err := container.Delete(ctx, r) 334 if err != nil { 335 return nil, errdefs.ToGRPC(err) 336 } 337 // if we deleted our init task, close the platform and send the task delete event 338 if r.ExecID == "" { 339 if s.platform != nil { 340 s.platform.Close() 341 } 342 s.send(&eventstypes.TaskDelete{ 343 ContainerID: container.ID, 344 Pid: uint32(p.Pid()), 345 ExitStatus: uint32(p.ExitStatus()), 346 ExitedAt: p.ExitedAt(), 347 }) 348 } 349 return &taskAPI.DeleteResponse{ 350 ExitStatus: uint32(p.ExitStatus()), 351 ExitedAt: p.ExitedAt(), 352 Pid: uint32(p.Pid()), 353 }, nil 354 } 355 356 // Exec an additional process inside the container 357 func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { 358 container, err := s.getContainer() 359 if err != nil { 360 return nil, err 361 } 362 ok, cancel := container.ReserveProcess(r.ExecID) 363 if !ok { 364 return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) 365 } 366 process, err := container.Exec(ctx, r) 367 if err != nil { 368 cancel() 369 return nil, errdefs.ToGRPC(err) 370 } 371 372 s.send(&eventstypes.TaskExecAdded{ 373 ContainerID: s.container.ID, 374 ExecID: process.ID(), 375 }) 376 return empty, nil 377 } 378 379 // ResizePty of a process 380 func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { 381 container, err := s.getContainer() 382 if err != nil { 383 return nil, err 384 } 385 if err := container.ResizePty(ctx, r); err != nil { 386 return nil, errdefs.ToGRPC(err) 387 } 388 return empty, nil 389 } 390 391 // State returns runtime state information for a process 392 func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { 393 p, err := s.getProcess(r.ExecID) 394 if err != nil { 395 return nil, err 396 } 397 st, err := p.Status(ctx) 398 if err != nil { 399 return nil, err 400 } 401 status := task.StatusUnknown 402 switch st { 403 case "created": 404 status = task.StatusCreated 405 case "running": 406 status = task.StatusRunning 407 case "stopped": 408 status = task.StatusStopped 409 case "paused": 410 status = task.StatusPaused 411 case "pausing": 412 status = task.StatusPausing 413 } 414 sio := p.Stdio() 415 return &taskAPI.StateResponse{ 416 ID: p.ID(), 417 Bundle: s.container.Bundle, 418 Pid: uint32(p.Pid()), 419 Status: status, 420 Stdin: sio.Stdin, 421 Stdout: sio.Stdout, 422 Stderr: sio.Stderr, 423 Terminal: sio.Terminal, 424 ExitStatus: uint32(p.ExitStatus()), 425 ExitedAt: p.ExitedAt(), 426 }, nil 427 } 428 429 // Pause the container 430 func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { 431 container, err := s.getContainer() 432 if err != nil { 433 return nil, err 434 } 435 if err := container.Pause(ctx); err != nil { 436 return nil, errdefs.ToGRPC(err) 437 } 438 s.send(&eventstypes.TaskPaused{ 439 ContainerID: container.ID, 440 }) 441 return empty, nil 442 } 443 444 // Resume the container 445 func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { 446 container, err := s.getContainer() 447 if err != nil { 448 return nil, err 449 } 450 if err := container.Resume(ctx); err != nil { 451 return nil, errdefs.ToGRPC(err) 452 } 453 s.send(&eventstypes.TaskResumed{ 454 ContainerID: container.ID, 455 }) 456 return empty, nil 457 } 458 459 // Kill a process with the provided signal 460 func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { 461 container, err := s.getContainer() 462 if err != nil { 463 return nil, err 464 } 465 if err := container.Kill(ctx, r); err != nil { 466 return nil, errdefs.ToGRPC(err) 467 } 468 return empty, nil 469 } 470 471 // Pids returns all pids inside the container 472 func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { 473 container, err := s.getContainer() 474 if err != nil { 475 return nil, err 476 } 477 pids, err := s.getContainerPids(ctx, r.ID) 478 if err != nil { 479 return nil, errdefs.ToGRPC(err) 480 } 481 var processes []*task.ProcessInfo 482 for _, pid := range pids { 483 pInfo := task.ProcessInfo{ 484 Pid: pid, 485 } 486 for _, p := range container.ExecdProcesses() { 487 if p.Pid() == int(pid) { 488 d := &options.ProcessDetails{ 489 ExecID: p.ID(), 490 } 491 a, err := typeurl.MarshalAny(d) 492 if err != nil { 493 return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) 494 } 495 pInfo.Info = a 496 break 497 } 498 } 499 processes = append(processes, &pInfo) 500 } 501 return &taskAPI.PidsResponse{ 502 Processes: processes, 503 }, nil 504 } 505 506 // CloseIO of a process 507 func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { 508 container, err := s.getContainer() 509 if err != nil { 510 return nil, err 511 } 512 if err := container.CloseIO(ctx, r); err != nil { 513 return nil, err 514 } 515 return empty, nil 516 } 517 518 // Checkpoint the container 519 func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { 520 container, err := s.getContainer() 521 if err != nil { 522 return nil, err 523 } 524 if err := container.Checkpoint(ctx, r); err != nil { 525 return nil, errdefs.ToGRPC(err) 526 } 527 return empty, nil 528 } 529 530 // Update a running container 531 func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { 532 container, err := s.getContainer() 533 if err != nil { 534 return nil, err 535 } 536 if err := container.Update(ctx, r); err != nil { 537 return nil, errdefs.ToGRPC(err) 538 } 539 return empty, nil 540 } 541 542 // Wait for a process to exit 543 func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { 544 container, err := s.getContainer() 545 if err != nil { 546 return nil, err 547 } 548 p, err := container.Process(r.ExecID) 549 if err != nil { 550 return nil, errdefs.ToGRPC(err) 551 } 552 p.Wait() 553 554 return &taskAPI.WaitResponse{ 555 ExitStatus: uint32(p.ExitStatus()), 556 ExitedAt: p.ExitedAt(), 557 }, nil 558 } 559 560 // Connect returns shim information such as the shim's pid 561 func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { 562 var pid int 563 if s.container != nil { 564 pid = s.container.Pid() 565 } 566 return &taskAPI.ConnectResponse{ 567 ShimPid: uint32(os.Getpid()), 568 TaskPid: uint32(pid), 569 }, nil 570 } 571 572 func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) { 573 // please make sure that temporary resource has been cleanup 574 // before shutdown service. 575 s.cancel() 576 close(s.events) 577 return empty, nil 578 } 579 580 func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { 581 cgx := s.container.Cgroup() 582 if cgx == nil { 583 return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "cgroup does not exist") 584 } 585 cg, ok := cgx.(cgroups.Cgroup) 586 if !ok { 587 return nil, errdefs.ToGRPCf(errdefs.ErrNotImplemented, "cgroup v2 not implemented for Stats") 588 } 589 if cg == nil { 590 return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "cgroup does not exist") 591 } 592 stats, err := cg.Stat(cgroups.IgnoreNotExist) 593 if err != nil { 594 return nil, err 595 } 596 data, err := typeurl.MarshalAny(stats) 597 if err != nil { 598 return nil, err 599 } 600 return &taskAPI.StatsResponse{ 601 Stats: data, 602 }, nil 603 } 604 605 func (s *service) processExits() { 606 for e := range s.ec { 607 s.checkProcesses(e) 608 } 609 } 610 611 func (s *service) send(evt interface{}) { 612 s.events <- evt 613 } 614 615 func (s *service) sendL(evt interface{}) { 616 s.eventSendMu.Lock() 617 s.events <- evt 618 s.eventSendMu.Unlock() 619 } 620 621 func (s *service) checkProcesses(e runcC.Exit) { 622 container, err := s.getContainer() 623 if err != nil { 624 return 625 } 626 627 for _, p := range container.All() { 628 if p.Pid() == e.Pid { 629 if runc.ShouldKillAllOnExit(s.context, container.Bundle) { 630 if ip, ok := p.(*process.Init); ok { 631 // Ensure all children are killed 632 if err := ip.KillAll(s.context); err != nil { 633 logrus.WithError(err).WithField("id", ip.ID()). 634 Error("failed to kill init's children") 635 } 636 } 637 } 638 p.SetExited(e.Status) 639 s.sendL(&eventstypes.TaskExit{ 640 ContainerID: container.ID, 641 ID: p.ID(), 642 Pid: uint32(e.Pid), 643 ExitStatus: uint32(e.Status), 644 ExitedAt: p.ExitedAt(), 645 }) 646 return 647 } 648 } 649 } 650 651 func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { 652 p, err := s.container.Process("") 653 if err != nil { 654 return nil, errdefs.ToGRPC(err) 655 } 656 ps, err := p.(*process.Init).Runtime().Ps(ctx, id) 657 if err != nil { 658 return nil, err 659 } 660 pids := make([]uint32, 0, len(ps)) 661 for _, pid := range ps { 662 pids = append(pids, uint32(pid)) 663 } 664 return pids, nil 665 } 666 667 func (s *service) forward(ctx context.Context, publisher shim.Publisher) { 668 ns, _ := namespaces.Namespace(ctx) 669 ctx = namespaces.WithNamespace(context.Background(), ns) 670 for e := range s.events { 671 ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 672 err := publisher.Publish(ctx, runc.GetTopic(e), e) 673 cancel() 674 if err != nil { 675 logrus.WithError(err).Error("post event") 676 } 677 } 678 publisher.Close() 679 } 680 681 func (s *service) getContainer() (*runc.Container, error) { 682 s.mu.Lock() 683 container := s.container 684 s.mu.Unlock() 685 if container == nil { 686 return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "container not created") 687 } 688 return container, nil 689 } 690 691 func (s *service) getProcess(execID string) (process.Process, error) { 692 container, err := s.getContainer() 693 if err != nil { 694 return nil, err 695 } 696 p, err := container.Process(execID) 697 if err != nil { 698 return nil, errdefs.ToGRPC(err) 699 } 700 return p, nil 701 } 702 703 // initialize a single epoll fd to manage our consoles. `initPlatform` should 704 // only be called once. 705 func (s *service) initPlatform() error { 706 if s.platform != nil { 707 return nil 708 } 709 p, err := runc.NewPlatform() 710 if err != nil { 711 return err 712 } 713 s.platform = p 714 return nil 715 }