github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/shim/service.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package shim implements Containerd Shim v2 interface. 16 package shim 17 18 import ( 19 "context" 20 "fmt" 21 "io" 22 "os" 23 "os/exec" 24 "path/filepath" 25 "strings" 26 "sync" 27 "time" 28 29 "github.com/BurntSushi/toml" 30 "github.com/containerd/cgroups" 31 cgroupsstats "github.com/containerd/cgroups/stats/v1" 32 cgroupsv2 "github.com/containerd/cgroups/v2" 33 "github.com/containerd/console" 34 "github.com/containerd/containerd/api/events" 35 "github.com/containerd/containerd/api/types/task" 36 "github.com/containerd/containerd/errdefs" 37 "github.com/containerd/containerd/log" 38 "github.com/containerd/containerd/mount" 39 "github.com/containerd/containerd/namespaces" 40 "github.com/containerd/containerd/pkg/process" 41 "github.com/containerd/containerd/pkg/stdio" 42 "github.com/containerd/containerd/runtime" 43 "github.com/containerd/containerd/runtime/linux/runctypes" 44 "github.com/containerd/containerd/runtime/v2/shim" 45 taskAPI "github.com/containerd/containerd/runtime/v2/task" 46 "github.com/containerd/containerd/sys/reaper" 47 "github.com/containerd/typeurl" 48 "github.com/gogo/protobuf/types" 49 specs "github.com/opencontainers/runtime-spec/specs-go" 50 "github.com/sirupsen/logrus" 51 "github.com/ttpreport/gvisor-ligolo/pkg/cleanup" 52 "github.com/ttpreport/gvisor-ligolo/pkg/shim/runtimeoptions/v14" 53 v14 "github.com/ttpreport/gvisor-ligolo/pkg/shim/runtimeoptions/v14" 54 "golang.org/x/sys/unix" 55 56 "github.com/ttpreport/gvisor-ligolo/pkg/shim/proc" 57 "github.com/ttpreport/gvisor-ligolo/pkg/shim/runsc" 58 "github.com/ttpreport/gvisor-ligolo/pkg/shim/runtimeoptions" 59 "github.com/ttpreport/gvisor-ligolo/pkg/shim/utils" 60 "github.com/ttpreport/gvisor-ligolo/runsc/specutils" 61 ) 62 63 var ( 64 empty = &types.Empty{} 65 bufPool = sync.Pool{ 66 New: func() any { 67 buffer := make([]byte, 32<<10) 68 return &buffer 69 }, 70 } 71 ) 72 73 const ( 74 // configFile is the default config file name. For containerd 1.2, 75 // we assume that a config.toml should exist in the runtime root. 76 configFile = "config.toml" 77 78 // shimAddressPath is the relative path to a file that contains the address 79 // to the shim UDS. See service.shimAddress. 80 shimAddressPath = "address" 81 82 cgroupParentAnnotation = "dev.gvisor.spec.cgroup-parent" 83 ) 84 85 type oomPoller interface { 86 io.Closer 87 // add adds `cg` cgroup to oom poller. `cg` is cgroups.Cgroup in v1 and 88 // `cgroupsv2.Manager` in v2 89 add(id string, cg any) error 90 // run monitors oom event and notifies the shim about them 91 run(ctx context.Context) 92 } 93 94 // New returns a new shim service that can be used via GRPC. 95 func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) { 96 var opts shim.Opts 97 if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil { 98 opts = ctxOpts.(shim.Opts) 99 } 100 101 var ( 102 ep oomPoller 103 err error 104 ) 105 if cgroups.Mode() == cgroups.Unified { 106 ep, err = newOOMv2Poller(publisher) 107 } else { 108 ep, err = newOOMEpoller(publisher) 109 } 110 if err != nil { 111 return nil, err 112 } 113 go ep.run(ctx) 114 s := &service{ 115 id: id, 116 processes: make(map[string]process.Process), 117 events: make(chan any, 128), 118 ec: proc.ExitCh, 119 oomPoller: ep, 120 cancel: cancel, 121 genericOptions: opts, 122 } 123 go s.processExits(ctx) 124 runsc.Monitor = &runsc.LogMonitor{Next: reaper.Default} 125 if err := s.initPlatform(); err != nil { 126 cancel() 127 return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) 128 } 129 go s.forward(ctx, publisher) 130 131 if address, err := shim.ReadAddress(shimAddressPath); err == nil { 132 s.shimAddress = address 133 } 134 135 return s, nil 136 } 137 138 // service is the shim implementation of a remote shim over GRPC. It runs in 2 139 // different modes: 140 // 1. Service: process runs for the life time of the container and receives 141 // calls described in shimapi.TaskService interface. 142 // 2. Tool: process is short lived and runs only to perform the requested 143 // operations and then exits. It implements the direct functions in 144 // shim.Shim interface. 145 // 146 // When the service is running, it saves a json file with state information so 147 // that commands sent to the tool can load the state and perform the operation. 148 type service struct { 149 mu sync.Mutex 150 151 // id is the container ID. 152 id string 153 154 // bundle is a path provided by the caller on container creation. Store 155 // because it's needed in commands that don't receive bundle in the request. 156 bundle string 157 158 // task is the main process that is running the container. 159 task *proc.Init 160 161 // processes maps ExecId to processes running through exec. 162 processes map[string]process.Process 163 164 events chan any 165 166 // platform handles operations related to the console. 167 platform stdio.Platform 168 169 // genericOptions are options that come from the shim interface and are common 170 // to all shims. 171 genericOptions shim.Opts 172 173 // opts are configuration options specific for this shim. 174 opts options 175 176 // ex gets notified whenever the container init process or an exec'd process 177 // exits from inside the sandbox. 178 ec chan proc.Exit 179 180 // oomPoller monitors the sandbox's cgroup for OOM notifications. 181 oomPoller oomPoller 182 183 // cancel is a function that needs to be called before the shim stops. The 184 // function is provided by the caller to New(). 185 cancel func() 186 187 // shimAddress is the location of the UDS used to communicate to containerd. 188 shimAddress string 189 } 190 191 var _ shim.Shim = (*service)(nil) 192 193 func (s *service) newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { 194 ns, err := namespaces.NamespaceRequired(ctx) 195 if err != nil { 196 return nil, err 197 } 198 self, err := os.Executable() 199 if err != nil { 200 return nil, err 201 } 202 cwd, err := os.Getwd() 203 if err != nil { 204 return nil, err 205 } 206 args := []string{ 207 "-namespace", ns, 208 "-address", containerdAddress, 209 "-publish-binary", containerdBinary, 210 } 211 if s.genericOptions.Debug { 212 args = append(args, "-debug") 213 } 214 cmd := exec.Command(self, args...) 215 cmd.Dir = cwd 216 cmd.Env = append(os.Environ(), "GOMAXPROCS=2") 217 cmd.SysProcAttr = &unix.SysProcAttr{ 218 Setpgid: true, 219 } 220 return cmd, nil 221 } 222 223 func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (string, error) { 224 log.L.Debugf("StartShim, id: %s, binary: %q, address: %q", id, containerdBinary, containerdAddress) 225 226 cmd, err := s.newCommand(ctx, containerdBinary, containerdAddress) 227 if err != nil { 228 return "", err 229 } 230 address, err := shim.SocketAddress(ctx, containerdAddress, id) 231 if err != nil { 232 return "", err 233 } 234 socket, err := shim.NewSocket(address) 235 if err != nil { 236 // The only time where this would happen is if there is a bug and the socket 237 // was not cleaned up in the cleanup method of the shim or we are using the 238 // grouping functionality where the new process should be run with the same 239 // shim as an existing container. 240 if !shim.SocketEaddrinuse(err) { 241 return "", fmt.Errorf("create new shim socket: %w", err) 242 } 243 if shim.CanConnect(address) { 244 if err := shim.WriteAddress(shimAddressPath, address); err != nil { 245 return "", fmt.Errorf("write existing socket for shim: %w", err) 246 } 247 return address, nil 248 } 249 if err := shim.RemoveSocket(address); err != nil { 250 return "", fmt.Errorf("remove pre-existing socket: %w", err) 251 } 252 if socket, err = shim.NewSocket(address); err != nil { 253 return "", fmt.Errorf("try create new shim socket 2x: %w", err) 254 } 255 } 256 cu := cleanup.Make(func() { 257 socket.Close() 258 _ = shim.RemoveSocket(address) 259 }) 260 defer cu.Clean() 261 262 f, err := socket.File() 263 if err != nil { 264 return "", err 265 } 266 267 cmd.ExtraFiles = append(cmd.ExtraFiles, f) 268 269 log.L.Debugf("Executing: %q %s", cmd.Path, cmd.Args) 270 if err := cmd.Start(); err != nil { 271 f.Close() 272 return "", err 273 } 274 cu.Add(func() { cmd.Process.Kill() }) 275 276 // make sure to wait after start 277 go cmd.Wait() 278 if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { 279 return "", err 280 } 281 if err := shim.WriteAddress(shimAddressPath, address); err != nil { 282 return "", err 283 } 284 if err := shim.SetScore(cmd.Process.Pid); err != nil { 285 return "", fmt.Errorf("failed to set OOM Score on shim: %w", err) 286 } 287 cu.Release() 288 return address, nil 289 } 290 291 // Cleanup is called from another process (need to reload state) to stop the 292 // container and undo all operations done in Create(). 293 func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { 294 log.L.Debugf("Cleanup") 295 296 path, err := os.Getwd() 297 if err != nil { 298 return nil, err 299 } 300 ns, err := namespaces.NamespaceRequired(ctx) 301 if err != nil { 302 return nil, err 303 } 304 var st state 305 if err := st.load(path); err != nil { 306 return nil, err 307 } 308 r := proc.NewRunsc(s.opts.Root, path, ns, st.Options.BinaryName, nil, nil) 309 310 if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ 311 Force: true, 312 }); err != nil { 313 log.L.Infof("failed to remove runc container: %v", err) 314 } 315 if err := mount.UnmountAll(st.Rootfs, 0); err != nil { 316 log.L.Infof("failed to cleanup rootfs mount: %v", err) 317 } 318 return &taskAPI.DeleteResponse{ 319 ExitedAt: time.Now(), 320 ExitStatus: 128 + uint32(unix.SIGKILL), 321 }, nil 322 } 323 324 // Create creates a new initial process and container with the underlying OCI 325 // runtime. 326 func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { 327 resp, err := s.create(ctx, r) 328 return resp, errdefs.ToGRPC(err) 329 } 330 331 func (s *service) create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { 332 s.mu.Lock() 333 defer s.mu.Unlock() 334 335 // Save the main task id and bundle to the shim for additional requests. 336 s.id = r.ID 337 s.bundle = r.Bundle 338 339 ns, err := namespaces.NamespaceRequired(ctx) 340 if err != nil { 341 return nil, fmt.Errorf("create namespace: %w", err) 342 } 343 344 // Read from root for now. 345 if r.Options != nil { 346 v, err := typeurl.UnmarshalAny(r.Options) 347 if err != nil { 348 return nil, err 349 } 350 var path string 351 switch o := v.(type) { 352 case *runctypes.CreateOptions: // containerd 1.2.x 353 s.opts.IoUID = o.IoUid 354 s.opts.IoGID = o.IoGid 355 s.opts.ShimCgroup = o.ShimCgroup 356 case *runctypes.RuncOptions: // containerd 1.2.x 357 root := proc.RunscRoot 358 if o.RuntimeRoot != "" { 359 root = o.RuntimeRoot 360 } 361 362 s.opts.BinaryName = o.Runtime 363 364 path = filepath.Join(root, configFile) 365 if _, err := os.Stat(path); err != nil { 366 if !os.IsNotExist(err) { 367 return nil, fmt.Errorf("stat config file %q: %w", path, err) 368 } 369 // A config file in runtime root is not required. 370 path = "" 371 } 372 case *runtimeoptions.Options: // containerd 1.5+ 373 if o.ConfigPath == "" { 374 break 375 } 376 if o.TypeUrl != optionsType { 377 return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl) 378 } 379 path = o.ConfigPath 380 case *v14.Options: // containerd 1.4- 381 if o.ConfigPath == "" { 382 break 383 } 384 if o.TypeUrl != optionsType { 385 return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl) 386 } 387 path = o.ConfigPath 388 default: 389 return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl) 390 } 391 if path != "" { 392 if _, err = toml.DecodeFile(path, &s.opts); err != nil { 393 return nil, fmt.Errorf("decode config file %q: %w", path, err) 394 } 395 } 396 } 397 398 if len(s.opts.LogLevel) != 0 { 399 lvl, err := logrus.ParseLevel(s.opts.LogLevel) 400 if err != nil { 401 return nil, err 402 } 403 logrus.SetLevel(lvl) 404 } 405 for _, emittedPath := range runsc.EmittedPaths(s.id, s.opts.RunscConfig) { 406 if err := os.MkdirAll(filepath.Dir(emittedPath), 0777); err != nil { 407 return nil, fmt.Errorf("failed to create parent directories for file %v: %w", emittedPath, err) 408 } 409 } 410 if len(s.opts.LogPath) != 0 { 411 logPath := runsc.FormatShimLogPath(s.opts.LogPath, s.id) 412 if err := os.MkdirAll(filepath.Dir(logPath), 0777); err != nil { 413 return nil, fmt.Errorf("failed to create log dir: %w", err) 414 } 415 logFile, err := os.Create(logPath) 416 if err != nil { 417 return nil, fmt.Errorf("failed to create log file: %w", err) 418 } 419 log.L.Debugf("Starting mirror log at %q", logPath) 420 std := logrus.StandardLogger() 421 std.SetOutput(io.MultiWriter(std.Out, logFile)) 422 423 log.L.Debugf("Create shim") 424 log.L.Debugf("***************************") 425 log.L.Debugf("Args: %s", os.Args) 426 log.L.Debugf("PID: %d", os.Getpid()) 427 log.L.Debugf("ID: %s", s.id) 428 log.L.Debugf("Options: %+v", s.opts) 429 log.L.Debugf("Bundle: %s", r.Bundle) 430 log.L.Debugf("Terminal: %t", r.Terminal) 431 log.L.Debugf("stdin: %s", r.Stdin) 432 log.L.Debugf("stdout: %s", r.Stdout) 433 log.L.Debugf("stderr: %s", r.Stderr) 434 log.L.Debugf("***************************") 435 if log.L.Logger.IsLevelEnabled(logrus.DebugLevel) { 436 setDebugSigHandler() 437 } 438 } 439 440 // Save state before any action is taken to ensure Cleanup() will have all 441 // the information it needs to undo the operations. 442 st := state{ 443 Rootfs: filepath.Join(r.Bundle, "rootfs"), 444 Options: s.opts, 445 } 446 if err := st.save(r.Bundle); err != nil { 447 return nil, err 448 } 449 450 if err := os.Mkdir(st.Rootfs, 0711); err != nil && !os.IsExist(err) { 451 return nil, err 452 } 453 454 // Convert from types.Mount to proc.Mount. 455 var mounts []proc.Mount 456 for _, m := range r.Rootfs { 457 mounts = append(mounts, proc.Mount{ 458 Type: m.Type, 459 Source: m.Source, 460 Target: m.Target, 461 Options: m.Options, 462 }) 463 } 464 465 // Cleans up all mounts in case of failure. 466 cu := cleanup.Make(func() { 467 if err := mount.UnmountAll(st.Rootfs, 0); err != nil { 468 log.L.Infof("failed to cleanup rootfs mount: %v", err) 469 } 470 }) 471 defer cu.Clean() 472 for _, rm := range mounts { 473 m := &mount.Mount{ 474 Type: rm.Type, 475 Source: rm.Source, 476 Options: rm.Options, 477 } 478 if err := m.Mount(st.Rootfs); err != nil { 479 return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) 480 } 481 } 482 483 config := &proc.CreateConfig{ 484 ID: r.ID, 485 Bundle: r.Bundle, 486 Runtime: s.opts.BinaryName, 487 Rootfs: mounts, 488 Terminal: r.Terminal, 489 Stdin: r.Stdin, 490 Stdout: r.Stdout, 491 Stderr: r.Stderr, 492 } 493 process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) 494 if err != nil { 495 return nil, err 496 } 497 if err := process.Create(ctx, config); err != nil { 498 return nil, err 499 } 500 501 // Set up OOM notification on the sandbox's cgroup. This is done on 502 // sandbox create since the sandbox process will be created here. 503 pid := process.Pid() 504 if pid > 0 { 505 var ( 506 cg any 507 err error 508 ) 509 if cgroups.Mode() == cgroups.Unified { 510 var cgPath string 511 cgPath, err = cgroupsv2.PidGroupPath(pid) 512 if err == nil { 513 cg, err = cgroupsv2.LoadManager("/sys/fs/cgroup", cgPath) 514 } 515 } else { 516 cg, err = cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) 517 } 518 if err != nil { 519 return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err) 520 } 521 if err := s.oomPoller.add(s.id, cg); err != nil { 522 return nil, fmt.Errorf("add cg to OOM monitor: %w", err) 523 } 524 } 525 526 // Success 527 cu.Release() 528 s.task = process 529 return &taskAPI.CreateTaskResponse{ 530 Pid: uint32(process.Pid()), 531 }, nil 532 } 533 534 // Start starts a process. 535 func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { 536 resp, err := s.start(ctx, r) 537 return resp, errdefs.ToGRPC(err) 538 } 539 540 func (s *service) start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { 541 log.L.Debugf("Start, id: %s, execID: %s", r.ID, r.ExecID) 542 543 p, err := s.getProcess(r.ExecID) 544 if err != nil { 545 return nil, err 546 } 547 if err := p.Start(ctx); err != nil { 548 return nil, err 549 } 550 // TODO: Set the cgroup and oom notifications on restore. 551 // https://github.com/google/gvisor-containerd-shim/issues/58 552 return &taskAPI.StartResponse{ 553 Pid: uint32(p.Pid()), 554 }, nil 555 } 556 557 // Delete deletes the initial process and container. 558 func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { 559 resp, err := s.delete(ctx, r) 560 return resp, errdefs.ToGRPC(err) 561 } 562 563 func (s *service) delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { 564 log.L.Debugf("Delete, id: %s, execID: %s", r.ID, r.ExecID) 565 566 p, err := s.getProcess(r.ExecID) 567 if err != nil { 568 return nil, err 569 } 570 if err := p.Delete(ctx); err != nil { 571 return nil, err 572 } 573 if len(r.ExecID) != 0 { 574 s.mu.Lock() 575 delete(s.processes, r.ExecID) 576 s.mu.Unlock() 577 } else if s.platform != nil { 578 s.platform.Close() 579 } 580 return &taskAPI.DeleteResponse{ 581 ExitStatus: uint32(p.ExitStatus()), 582 ExitedAt: p.ExitedAt(), 583 Pid: uint32(p.Pid()), 584 }, nil 585 } 586 587 // Exec spawns an additional process inside the container. 588 func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) { 589 resp, err := s.exec(ctx, r) 590 return resp, errdefs.ToGRPC(err) 591 } 592 593 func (s *service) exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) { 594 log.L.Debugf("Exec, id: %s, execID: %s", r.ID, r.ExecID) 595 596 s.mu.Lock() 597 p := s.processes[r.ExecID] 598 s.mu.Unlock() 599 if p != nil { 600 return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) 601 } 602 if s.task == nil { 603 return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 604 } 605 process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ 606 ID: r.ExecID, 607 Terminal: r.Terminal, 608 Stdin: r.Stdin, 609 Stdout: r.Stdout, 610 Stderr: r.Stderr, 611 Spec: r.Spec, 612 }) 613 if err != nil { 614 return nil, err 615 } 616 s.mu.Lock() 617 s.processes[r.ExecID] = process 618 s.mu.Unlock() 619 return empty, nil 620 } 621 622 // ResizePty resizes the terminal of a process. 623 func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) { 624 resp, err := s.resizePty(ctx, r) 625 return resp, errdefs.ToGRPC(err) 626 } 627 628 func (s *service) resizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) { 629 log.L.Debugf("ResizePty, id: %s, execID: %s, dimension: %dx%d", r.ID, r.ExecID, r.Height, r.Width) 630 631 p, err := s.getProcess(r.ExecID) 632 if err != nil { 633 return nil, err 634 } 635 ws := console.WinSize{ 636 Width: uint16(r.Width), 637 Height: uint16(r.Height), 638 } 639 if err := p.Resize(ws); err != nil { 640 return nil, err 641 } 642 return empty, nil 643 } 644 645 // State returns runtime state information for a process. 646 func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { 647 resp, err := s.state(ctx, r) 648 return resp, errdefs.ToGRPC(err) 649 } 650 651 func (s *service) state(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { 652 log.L.Debugf("State, id: %s, execID: %s", r.ID, r.ExecID) 653 654 p, err := s.getProcess(r.ExecID) 655 if err != nil { 656 log.L.Debugf("State failed to find process: %v", err) 657 return nil, err 658 } 659 st, err := p.Status(ctx) 660 if err != nil { 661 log.L.Debugf("State failed: %v", err) 662 return nil, err 663 } 664 status := task.StatusUnknown 665 switch st { 666 case "created": 667 status = task.StatusCreated 668 case "running": 669 status = task.StatusRunning 670 case "stopped": 671 status = task.StatusStopped 672 } 673 sio := p.Stdio() 674 res := &taskAPI.StateResponse{ 675 ID: p.ID(), 676 Bundle: s.bundle, 677 Pid: uint32(p.Pid()), 678 Status: status, 679 Stdin: sio.Stdin, 680 Stdout: sio.Stdout, 681 Stderr: sio.Stderr, 682 Terminal: sio.Terminal, 683 ExitStatus: uint32(p.ExitStatus()), 684 ExitedAt: p.ExitedAt(), 685 } 686 log.L.Debugf("State succeeded, response: %+v", res) 687 return res, nil 688 } 689 690 // Pause the container. 691 func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) { 692 resp, err := s.pause(ctx, r) 693 return resp, errdefs.ToGRPC(err) 694 } 695 696 func (s *service) pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) { 697 log.L.Debugf("Pause, id: %s", r.ID) 698 if s.task == nil { 699 log.L.Debugf("Pause error, id: %s: container not created", r.ID) 700 return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 701 } 702 err := s.task.Runtime().Pause(ctx, r.ID) 703 if err != nil { 704 return nil, err 705 } 706 return empty, nil 707 } 708 709 // Resume the container. 710 func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) { 711 resp, err := s.resume(ctx, r) 712 return resp, errdefs.ToGRPC(err) 713 } 714 715 func (s *service) resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) { 716 log.L.Debugf("Resume, id: %s", r.ID) 717 if s.task == nil { 718 log.L.Debugf("Resume error, id: %s: container not created", r.ID) 719 return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 720 } 721 err := s.task.Runtime().Resume(ctx, r.ID) 722 if err != nil { 723 return nil, err 724 } 725 return empty, nil 726 } 727 728 // Kill a process with the provided signal. 729 func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) { 730 resp, err := s.kill(ctx, r) 731 return resp, errdefs.ToGRPC(err) 732 } 733 734 func (s *service) kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) { 735 log.L.Debugf("Kill, id: %s, execID: %s, signal: %d, all: %t", r.ID, r.ExecID, r.Signal, r.All) 736 737 p, err := s.getProcess(r.ExecID) 738 if err != nil { 739 return nil, err 740 } 741 if err := p.Kill(ctx, r.Signal, r.All); err != nil { 742 log.L.Debugf("Kill failed: %v", err) 743 return nil, err 744 } 745 log.L.Debugf("Kill succeeded") 746 return empty, nil 747 } 748 749 // Pids returns all pids inside the container. 750 func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { 751 resp, err := s.pids(ctx, r) 752 return resp, errdefs.ToGRPC(err) 753 } 754 755 func (s *service) pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { 756 log.L.Debugf("Pids, id: %s", r.ID) 757 758 pids, err := s.getContainerPids(ctx, r.ID) 759 if err != nil { 760 return nil, err 761 } 762 var processes []*task.ProcessInfo 763 for _, pid := range pids { 764 pInfo := task.ProcessInfo{ 765 Pid: pid, 766 } 767 for _, p := range s.processes { 768 if p.Pid() == int(pid) { 769 d := &runctypes.ProcessDetails{ 770 ExecID: p.ID(), 771 } 772 a, err := typeurl.MarshalAny(d) 773 if err != nil { 774 return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) 775 } 776 pInfo.Info = a 777 break 778 } 779 } 780 processes = append(processes, &pInfo) 781 } 782 return &taskAPI.PidsResponse{ 783 Processes: processes, 784 }, nil 785 } 786 787 // CloseIO closes the I/O context of a process. 788 func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) { 789 resp, err := s.closeIO(ctx, r) 790 return resp, errdefs.ToGRPC(err) 791 } 792 793 func (s *service) closeIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) { 794 log.L.Debugf("CloseIO, id: %s, execID: %s, stdin: %t", r.ID, r.ExecID, r.Stdin) 795 796 p, err := s.getProcess(r.ExecID) 797 if err != nil { 798 return nil, err 799 } 800 if stdin := p.Stdin(); stdin != nil { 801 if err := stdin.Close(); err != nil { 802 return nil, fmt.Errorf("close stdin: %w", err) 803 } 804 } 805 return empty, nil 806 } 807 808 // Checkpoint checkpoints the container. 809 func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { 810 log.L.Debugf("Checkpoint, id: %s", r.ID) 811 return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) 812 } 813 814 // Connect returns shim information such as the shim's pid. 815 func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { 816 resp, err := s.connect(ctx, r) 817 return resp, errdefs.ToGRPC(err) 818 } 819 820 func (s *service) connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { 821 log.L.Debugf("Connect, id: %s", r.ID) 822 823 var pid int 824 if s.task != nil { 825 pid = s.task.Pid() 826 } 827 return &taskAPI.ConnectResponse{ 828 ShimPid: uint32(os.Getpid()), 829 TaskPid: uint32(pid), 830 }, nil 831 } 832 833 func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) { 834 resp, err := s.shutdown(ctx, r) 835 return resp, errdefs.ToGRPC(err) 836 } 837 838 func (s *service) shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) { 839 log.L.Debugf("Shutdown, id: %s", r.ID) 840 s.cancel() 841 if s.shimAddress != "" { 842 _ = shim.RemoveSocket(s.shimAddress) 843 } 844 os.Exit(0) 845 panic("Should not get here") 846 } 847 848 func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { 849 resp, err := s.stats(ctx, r) 850 return resp, errdefs.ToGRPC(err) 851 } 852 853 func (s *service) stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { 854 log.L.Debugf("Stats, id: %s", r.ID) 855 if s.task == nil { 856 log.L.Debugf("Stats error, id: %s: container not created", r.ID) 857 return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 858 } 859 stats, err := s.task.Stats(ctx, s.id) 860 if err != nil { 861 log.L.Debugf("Stats error, id: %s: %v", r.ID, err) 862 return nil, err 863 } 864 865 // gvisor currently (as of 2020-03-03) only returns the total memory 866 // usage and current PID value[0]. However, we copy the common fields here 867 // so that future updates will propagate correct information. We're 868 // using the cgroups.Metrics structure so we're returning the same type 869 // as runc. 870 // 871 // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 872 metrics := &cgroupsstats.Metrics{ 873 CPU: &cgroupsstats.CPUStat{ 874 Usage: &cgroupsstats.CPUUsage{ 875 Total: stats.Cpu.Usage.Total, 876 Kernel: stats.Cpu.Usage.Kernel, 877 User: stats.Cpu.Usage.User, 878 PerCPU: stats.Cpu.Usage.Percpu, 879 }, 880 Throttling: &cgroupsstats.Throttle{ 881 Periods: stats.Cpu.Throttling.Periods, 882 ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, 883 ThrottledTime: stats.Cpu.Throttling.ThrottledTime, 884 }, 885 }, 886 Memory: &cgroupsstats.MemoryStat{ 887 Cache: stats.Memory.Cache, 888 Usage: &cgroupsstats.MemoryEntry{ 889 Limit: stats.Memory.Usage.Limit, 890 Usage: stats.Memory.Usage.Usage, 891 Max: stats.Memory.Usage.Max, 892 Failcnt: stats.Memory.Usage.Failcnt, 893 }, 894 Swap: &cgroupsstats.MemoryEntry{ 895 Limit: stats.Memory.Swap.Limit, 896 Usage: stats.Memory.Swap.Usage, 897 Max: stats.Memory.Swap.Max, 898 Failcnt: stats.Memory.Swap.Failcnt, 899 }, 900 Kernel: &cgroupsstats.MemoryEntry{ 901 Limit: stats.Memory.Kernel.Limit, 902 Usage: stats.Memory.Kernel.Usage, 903 Max: stats.Memory.Kernel.Max, 904 Failcnt: stats.Memory.Kernel.Failcnt, 905 }, 906 KernelTCP: &cgroupsstats.MemoryEntry{ 907 Limit: stats.Memory.KernelTCP.Limit, 908 Usage: stats.Memory.KernelTCP.Usage, 909 Max: stats.Memory.KernelTCP.Max, 910 Failcnt: stats.Memory.KernelTCP.Failcnt, 911 }, 912 }, 913 Pids: &cgroupsstats.PidsStat{ 914 Current: stats.Pids.Current, 915 Limit: stats.Pids.Limit, 916 }, 917 } 918 data, err := typeurl.MarshalAny(metrics) 919 if err != nil { 920 log.L.Debugf("Stats error, id: %s: %v", r.ID, err) 921 return nil, err 922 } 923 log.L.Debugf("Stats success, id: %s: %+v", r.ID, data) 924 return &taskAPI.StatsResponse{ 925 Stats: data, 926 }, nil 927 } 928 929 // Update updates a running container. 930 func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { 931 return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) 932 } 933 934 // Wait waits for a process to exit. 935 func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { 936 resp, err := s.wait(ctx, r) 937 return resp, errdefs.ToGRPC(err) 938 } 939 940 func (s *service) wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { 941 log.L.Debugf("Wait, id: %s, execID: %s", r.ID, r.ExecID) 942 943 p, err := s.getProcess(r.ExecID) 944 if err != nil { 945 log.L.Debugf("Wait failed to find process: %v", err) 946 return nil, err 947 } 948 p.Wait() 949 950 res := &taskAPI.WaitResponse{ 951 ExitStatus: uint32(p.ExitStatus()), 952 ExitedAt: p.ExitedAt(), 953 } 954 log.L.Debugf("Wait succeeded, response: %+v", res) 955 return res, nil 956 } 957 958 func (s *service) processExits(ctx context.Context) { 959 for e := range s.ec { 960 s.checkProcesses(ctx, e) 961 } 962 } 963 964 func (s *service) checkProcesses(ctx context.Context, e proc.Exit) { 965 // TODO(random-liu): Add `shouldKillAll` logic if container pid 966 // namespace is supported. 967 for _, p := range s.allProcesses() { 968 if p.ID() == e.ID { 969 if ip, ok := p.(*proc.Init); ok { 970 // Ensure all children are killed. 971 log.L.Debugf("Container init process exited, killing all container processes") 972 ip.KillAll(ctx) 973 } 974 p.SetExited(e.Status) 975 s.events <- &events.TaskExit{ 976 ContainerID: s.id, 977 ID: p.ID(), 978 Pid: uint32(p.Pid()), 979 ExitStatus: uint32(e.Status), 980 ExitedAt: p.ExitedAt(), 981 } 982 return 983 } 984 } 985 } 986 987 func (s *service) allProcesses() (o []process.Process) { 988 s.mu.Lock() 989 defer s.mu.Unlock() 990 for _, p := range s.processes { 991 o = append(o, p) 992 } 993 if s.task != nil { 994 o = append(o, s.task) 995 } 996 return o 997 } 998 999 func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { 1000 s.mu.Lock() 1001 p := s.task 1002 s.mu.Unlock() 1003 if p == nil { 1004 return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition) 1005 } 1006 ps, err := p.Runtime().Ps(ctx, id) 1007 if err != nil { 1008 return nil, err 1009 } 1010 pids := make([]uint32, 0, len(ps)) 1011 for _, pid := range ps { 1012 pids = append(pids, uint32(pid)) 1013 } 1014 return pids, nil 1015 } 1016 1017 func (s *service) forward(ctx context.Context, publisher shim.Publisher) { 1018 for e := range s.events { 1019 err := publisher.Publish(ctx, getTopic(e), e) 1020 if err != nil { 1021 // Should not happen. 1022 panic(fmt.Errorf("post event: %w", err)) 1023 } 1024 } 1025 } 1026 1027 func (s *service) getProcess(execID string) (process.Process, error) { 1028 s.mu.Lock() 1029 defer s.mu.Unlock() 1030 1031 if execID == "" { 1032 if s.task == nil { 1033 return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 1034 } 1035 return s.task, nil 1036 } 1037 1038 p := s.processes[execID] 1039 if p == nil { 1040 return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) 1041 } 1042 return p, nil 1043 } 1044 1045 func getTopic(e any) string { 1046 switch e.(type) { 1047 case *events.TaskCreate: 1048 return runtime.TaskCreateEventTopic 1049 case *events.TaskStart: 1050 return runtime.TaskStartEventTopic 1051 case *events.TaskOOM: 1052 return runtime.TaskOOMEventTopic 1053 case *events.TaskExit: 1054 return runtime.TaskExitEventTopic 1055 case *events.TaskDelete: 1056 return runtime.TaskDeleteEventTopic 1057 case *events.TaskExecAdded: 1058 return runtime.TaskExecAddedEventTopic 1059 case *events.TaskExecStarted: 1060 return runtime.TaskExecStartedEventTopic 1061 default: 1062 log.L.Infof("no topic for type %#v", e) 1063 } 1064 return runtime.TaskUnknownTopic 1065 } 1066 1067 func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.CreateConfig, options *options, rootfs string) (*proc.Init, error) { 1068 spec, err := utils.ReadSpec(r.Bundle) 1069 if err != nil { 1070 return nil, fmt.Errorf("read oci spec: %w", err) 1071 } 1072 1073 updated, err := utils.UpdateVolumeAnnotations(spec) 1074 if err != nil { 1075 return nil, fmt.Errorf("update volume annotations: %w", err) 1076 } 1077 updated = setPodCgroup(spec) || updated 1078 1079 if updated { 1080 if err := utils.WriteSpec(r.Bundle, spec); err != nil { 1081 return nil, err 1082 } 1083 } 1084 1085 runsc.FormatRunscPaths(r.ID, options.RunscConfig) 1086 runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig, spec) 1087 p := proc.New(r.ID, runtime, stdio.Stdio{ 1088 Stdin: r.Stdin, 1089 Stdout: r.Stdout, 1090 Stderr: r.Stderr, 1091 Terminal: r.Terminal, 1092 }) 1093 p.Bundle = r.Bundle 1094 p.Platform = platform 1095 p.Rootfs = rootfs 1096 p.WorkDir = workDir 1097 p.IoUID = int(options.IoUID) 1098 p.IoGID = int(options.IoGID) 1099 p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox 1100 p.UserLog = utils.UserLogPath(spec) 1101 p.Monitor = reaper.Default 1102 return p, nil 1103 } 1104 1105 // setPodCgroup searches for the pod cgroup path inside the container's cgroup 1106 // path. If found, it's set as an annotation in the spec. This is done so that 1107 // the sandbox joins the pod cgroup. Otherwise, the sandbox would join the pause 1108 // container cgroup. Returns true if the spec was modified. Ex.: 1109 // /kubepods/burstable/pod123/container123 => kubepods/burstable/pod123 1110 func setPodCgroup(spec *specs.Spec) bool { 1111 if !utils.IsSandbox(spec) { 1112 return false 1113 } 1114 if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 { 1115 return false 1116 } 1117 1118 // Search backwards for the pod cgroup path to make the sandbox use it, 1119 // instead of the pause container's cgroup. 1120 parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator)) 1121 for i := len(parts) - 1; i >= 0; i-- { 1122 if strings.HasPrefix(parts[i], "pod") { 1123 var path string 1124 for j := 0; j <= i; j++ { 1125 path = filepath.Join(path, parts[j]) 1126 } 1127 // Add back the initial '/' that may have been lost above. 1128 if filepath.IsAbs(spec.Linux.CgroupsPath) { 1129 path = string(filepath.Separator) + path 1130 } 1131 if spec.Linux.CgroupsPath == path { 1132 return false 1133 } 1134 if spec.Annotations == nil { 1135 spec.Annotations = make(map[string]string) 1136 } 1137 spec.Annotations[cgroupParentAnnotation] = path 1138 return true 1139 } 1140 } 1141 return false 1142 }