github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/shim/service.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package shim implements Containerd Shim v2 interface. 16 package shim 17 18 import ( 19 "context" 20 "fmt" 21 "io" 22 "os" 23 "os/exec" 24 "path/filepath" 25 "strings" 26 "sync" 27 "time" 28 29 "github.com/BurntSushi/toml" 30 "github.com/containerd/cgroups" 31 cgroupsstats "github.com/containerd/cgroups/stats/v1" 32 "github.com/containerd/console" 33 "github.com/containerd/containerd/api/events" 34 "github.com/containerd/containerd/api/types/task" 35 "github.com/containerd/containerd/errdefs" 36 "github.com/containerd/containerd/log" 37 "github.com/containerd/containerd/mount" 38 "github.com/containerd/containerd/namespaces" 39 "github.com/containerd/containerd/pkg/process" 40 "github.com/containerd/containerd/pkg/stdio" 41 "github.com/containerd/containerd/runtime" 42 "github.com/containerd/containerd/runtime/linux/runctypes" 43 "github.com/containerd/containerd/runtime/v2/shim" 44 taskAPI "github.com/containerd/containerd/runtime/v2/task" 45 "github.com/containerd/containerd/sys/reaper" 46 "github.com/containerd/typeurl" 47 "github.com/gogo/protobuf/types" 48 specs "github.com/opencontainers/runtime-spec/specs-go" 49 "github.com/sirupsen/logrus" 50 "golang.org/x/sys/unix" 51 "github.com/SagerNet/gvisor/pkg/cleanup" 52 53 "github.com/SagerNet/gvisor/pkg/shim/proc" 54 "github.com/SagerNet/gvisor/pkg/shim/runsc" 55 "github.com/SagerNet/gvisor/pkg/shim/runtimeoptions" 56 "github.com/SagerNet/gvisor/pkg/shim/utils" 57 "github.com/SagerNet/gvisor/runsc/specutils" 58 ) 59 60 var ( 61 empty = &types.Empty{} 62 bufPool = sync.Pool{ 63 New: func() interface{} { 64 buffer := make([]byte, 32<<10) 65 return &buffer 66 }, 67 } 68 ) 69 70 var _ = (taskAPI.TaskService)(&service{}) 71 72 const ( 73 // configFile is the default config file name. For containerd 1.2, 74 // we assume that a config.toml should exist in the runtime root. 75 configFile = "config.toml" 76 77 // shimAddressPath is the relative path to a file that contains the address 78 // to the shim UDS. See service.shimAddress. 79 shimAddressPath = "address" 80 ) 81 82 // New returns a new shim service that can be used via GRPC. 83 func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) { 84 var opts shim.Opts 85 if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil { 86 opts = ctxOpts.(shim.Opts) 87 } 88 89 ep, err := newOOMEpoller(publisher) 90 if err != nil { 91 return nil, err 92 } 93 go ep.run(ctx) 94 s := &service{ 95 id: id, 96 processes: make(map[string]process.Process), 97 events: make(chan interface{}, 128), 98 ec: proc.ExitCh, 99 oomPoller: ep, 100 cancel: cancel, 101 genericOptions: opts, 102 } 103 go s.processExits(ctx) 104 runsc.Monitor = &runsc.LogMonitor{Next: reaper.Default} 105 if err := s.initPlatform(); err != nil { 106 cancel() 107 return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) 108 } 109 go s.forward(ctx, publisher) 110 111 if address, err := shim.ReadAddress(shimAddressPath); err == nil { 112 s.shimAddress = address 113 } 114 115 return s, nil 116 } 117 118 // service is the shim implementation of a remote shim over GRPC. It runs in 2 119 // different modes: 120 // 1. Service: process runs for the life time of the container and receives 121 // calls described in shimapi.TaskService interface. 122 // 2. Tool: process is short lived and runs only to perform the requested 123 // operations and then exits. It implements the direct functions in 124 // shim.Shim interface. 125 // 126 // When the service is running, it saves a json file with state information so 127 // that commands sent to the tool can load the state and perform the operation. 128 type service struct { 129 mu sync.Mutex 130 131 // id is the container ID. 132 id string 133 134 // bundle is a path provided by the caller on container creation. Store 135 // because it's needed in commands that don't receive bundle in the request. 136 bundle string 137 138 // task is the main process that is running the container. 139 task *proc.Init 140 141 // processes maps ExecId to processes running through exec. 142 processes map[string]process.Process 143 144 events chan interface{} 145 146 // platform handles operations related to the console. 147 platform stdio.Platform 148 149 // genericOptions are options that come from the shim interface and are common 150 // to all shims. 151 genericOptions shim.Opts 152 153 // opts are configuration options specific for this shim. 154 opts options 155 156 // ex gets notified whenever the container init process or an exec'd process 157 // exits from inside the sandbox. 158 ec chan proc.Exit 159 160 // oomPoller monitors the sandbox's cgroup for OOM notifications. 161 oomPoller *epoller 162 163 // cancel is a function that needs to be called before the shim stops. The 164 // function is provided by the caller to New(). 165 cancel func() 166 167 // shimAddress is the location of the UDS used to communicate to containerd. 168 shimAddress string 169 } 170 171 func (s *service) newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { 172 ns, err := namespaces.NamespaceRequired(ctx) 173 if err != nil { 174 return nil, err 175 } 176 self, err := os.Executable() 177 if err != nil { 178 return nil, err 179 } 180 cwd, err := os.Getwd() 181 if err != nil { 182 return nil, err 183 } 184 args := []string{ 185 "-namespace", ns, 186 "-address", containerdAddress, 187 "-publish-binary", containerdBinary, 188 } 189 if s.genericOptions.Debug { 190 args = append(args, "-debug") 191 } 192 cmd := exec.Command(self, args...) 193 cmd.Dir = cwd 194 cmd.Env = append(os.Environ(), "GOMAXPROCS=2") 195 cmd.SysProcAttr = &unix.SysProcAttr{ 196 Setpgid: true, 197 } 198 return cmd, nil 199 } 200 201 func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (string, error) { 202 log.L.Debugf("StartShim, id: %s, binary: %q, address: %q", id, containerdBinary, containerdAddress) 203 204 cmd, err := s.newCommand(ctx, containerdBinary, containerdAddress) 205 if err != nil { 206 return "", err 207 } 208 address, err := shim.SocketAddress(ctx, containerdAddress, id) 209 if err != nil { 210 return "", err 211 } 212 socket, err := shim.NewSocket(address) 213 if err != nil { 214 // The only time where this would happen is if there is a bug and the socket 215 // was not cleaned up in the cleanup method of the shim or we are using the 216 // grouping functionality where the new process should be run with the same 217 // shim as an existing container. 218 if !shim.SocketEaddrinuse(err) { 219 return "", fmt.Errorf("create new shim socket: %w", err) 220 } 221 if shim.CanConnect(address) { 222 if err := shim.WriteAddress(shimAddressPath, address); err != nil { 223 return "", fmt.Errorf("write existing socket for shim: %w", err) 224 } 225 return address, nil 226 } 227 if err := shim.RemoveSocket(address); err != nil { 228 return "", fmt.Errorf("remove pre-existing socket: %w", err) 229 } 230 if socket, err = shim.NewSocket(address); err != nil { 231 return "", fmt.Errorf("try create new shim socket 2x: %w", err) 232 } 233 } 234 cu := cleanup.Make(func() { 235 socket.Close() 236 _ = shim.RemoveSocket(address) 237 }) 238 defer cu.Clean() 239 240 f, err := socket.File() 241 if err != nil { 242 return "", err 243 } 244 245 cmd.ExtraFiles = append(cmd.ExtraFiles, f) 246 247 log.L.Debugf("Executing: %q %s", cmd.Path, cmd.Args) 248 if err := cmd.Start(); err != nil { 249 f.Close() 250 return "", err 251 } 252 cu.Add(func() { cmd.Process.Kill() }) 253 254 // make sure to wait after start 255 go cmd.Wait() 256 if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { 257 return "", err 258 } 259 if err := shim.WriteAddress(shimAddressPath, address); err != nil { 260 return "", err 261 } 262 if err := shim.SetScore(cmd.Process.Pid); err != nil { 263 return "", fmt.Errorf("failed to set OOM Score on shim: %w", err) 264 } 265 cu.Release() 266 return address, nil 267 } 268 269 // Cleanup is called from another process (need to reload state) to stop the 270 // container and undo all operations done in Create(). 271 func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { 272 log.L.Debugf("Cleanup") 273 274 path, err := os.Getwd() 275 if err != nil { 276 return nil, err 277 } 278 ns, err := namespaces.NamespaceRequired(ctx) 279 if err != nil { 280 return nil, err 281 } 282 var st state 283 if err := st.load(path); err != nil { 284 return nil, err 285 } 286 r := proc.NewRunsc(s.opts.Root, path, ns, st.Options.BinaryName, nil) 287 288 if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ 289 Force: true, 290 }); err != nil { 291 log.L.Infof("failed to remove runc container: %v", err) 292 } 293 if err := mount.UnmountAll(st.Rootfs, 0); err != nil { 294 log.L.Infof("failed to cleanup rootfs mount: %v", err) 295 } 296 return &taskAPI.DeleteResponse{ 297 ExitedAt: time.Now(), 298 ExitStatus: 128 + uint32(unix.SIGKILL), 299 }, nil 300 } 301 302 // Create creates a new initial process and container with the underlying OCI 303 // runtime. 304 func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { 305 s.mu.Lock() 306 defer s.mu.Unlock() 307 308 // Save the main task id and bundle to the shim for additional requests. 309 s.id = r.ID 310 s.bundle = r.Bundle 311 312 ns, err := namespaces.NamespaceRequired(ctx) 313 if err != nil { 314 return nil, fmt.Errorf("create namespace: %w", err) 315 } 316 317 // Read from root for now. 318 if r.Options != nil { 319 v, err := typeurl.UnmarshalAny(r.Options) 320 if err != nil { 321 return nil, err 322 } 323 var path string 324 switch o := v.(type) { 325 case *runctypes.CreateOptions: // containerd 1.2.x 326 s.opts.IoUID = o.IoUid 327 s.opts.IoGID = o.IoGid 328 s.opts.ShimCgroup = o.ShimCgroup 329 case *runctypes.RuncOptions: // containerd 1.2.x 330 root := proc.RunscRoot 331 if o.RuntimeRoot != "" { 332 root = o.RuntimeRoot 333 } 334 335 s.opts.BinaryName = o.Runtime 336 337 path = filepath.Join(root, configFile) 338 if _, err := os.Stat(path); err != nil { 339 if !os.IsNotExist(err) { 340 return nil, fmt.Errorf("stat config file %q: %w", path, err) 341 } 342 // A config file in runtime root is not required. 343 path = "" 344 } 345 case *runtimeoptions.Options: // containerd 1.3.x+ 346 if o.ConfigPath == "" { 347 break 348 } 349 if o.TypeUrl != optionsType { 350 return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl) 351 } 352 path = o.ConfigPath 353 default: 354 return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl) 355 } 356 if path != "" { 357 if _, err = toml.DecodeFile(path, &s.opts); err != nil { 358 return nil, fmt.Errorf("decode config file %q: %w", path, err) 359 } 360 } 361 } 362 363 if len(s.opts.LogLevel) != 0 { 364 lvl, err := logrus.ParseLevel(s.opts.LogLevel) 365 if err != nil { 366 return nil, err 367 } 368 logrus.SetLevel(lvl) 369 } 370 if len(s.opts.LogPath) != 0 { 371 logPath := runsc.FormatShimLogPath(s.opts.LogPath, s.id) 372 if err := os.MkdirAll(filepath.Dir(logPath), 0777); err != nil { 373 return nil, fmt.Errorf("failed to create log dir: %w", err) 374 } 375 logFile, err := os.Create(logPath) 376 if err != nil { 377 return nil, fmt.Errorf("failed to create log file: %w", err) 378 } 379 log.L.Debugf("Starting mirror log at %q", logPath) 380 std := logrus.StandardLogger() 381 std.SetOutput(io.MultiWriter(std.Out, logFile)) 382 383 log.L.Debugf("Create shim") 384 log.L.Debugf("***************************") 385 log.L.Debugf("Args: %s", os.Args) 386 log.L.Debugf("PID: %d", os.Getpid()) 387 log.L.Debugf("ID: %s", s.id) 388 log.L.Debugf("Options: %+v", s.opts) 389 log.L.Debugf("Bundle: %s", r.Bundle) 390 log.L.Debugf("Terminal: %t", r.Terminal) 391 log.L.Debugf("stdin: %s", r.Stdin) 392 log.L.Debugf("stdout: %s", r.Stdout) 393 log.L.Debugf("stderr: %s", r.Stderr) 394 log.L.Debugf("***************************") 395 if log.L.Logger.IsLevelEnabled(logrus.DebugLevel) { 396 setDebugSigHandler() 397 } 398 } 399 400 // Save state before any action is taken to ensure Cleanup() will have all 401 // the information it needs to undo the operations. 402 st := state{ 403 Rootfs: filepath.Join(r.Bundle, "rootfs"), 404 Options: s.opts, 405 } 406 if err := st.save(r.Bundle); err != nil { 407 return nil, err 408 } 409 410 if err := os.Mkdir(st.Rootfs, 0711); err != nil && !os.IsExist(err) { 411 return nil, err 412 } 413 414 // Convert from types.Mount to proc.Mount. 415 var mounts []proc.Mount 416 for _, m := range r.Rootfs { 417 mounts = append(mounts, proc.Mount{ 418 Type: m.Type, 419 Source: m.Source, 420 Target: m.Target, 421 Options: m.Options, 422 }) 423 } 424 425 // Cleans up all mounts in case of failure. 426 cu := cleanup.Make(func() { 427 if err := mount.UnmountAll(st.Rootfs, 0); err != nil { 428 log.L.Infof("failed to cleanup rootfs mount: %v", err) 429 } 430 }) 431 defer cu.Clean() 432 for _, rm := range mounts { 433 m := &mount.Mount{ 434 Type: rm.Type, 435 Source: rm.Source, 436 Options: rm.Options, 437 } 438 if err := m.Mount(st.Rootfs); err != nil { 439 return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) 440 } 441 } 442 443 config := &proc.CreateConfig{ 444 ID: r.ID, 445 Bundle: r.Bundle, 446 Runtime: s.opts.BinaryName, 447 Rootfs: mounts, 448 Terminal: r.Terminal, 449 Stdin: r.Stdin, 450 Stdout: r.Stdout, 451 Stderr: r.Stderr, 452 } 453 process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) 454 if err != nil { 455 return nil, utils.ErrToGRPC(err) 456 } 457 if err := process.Create(ctx, config); err != nil { 458 return nil, utils.ErrToGRPC(err) 459 } 460 461 // Set up OOM notification on the sandbox's cgroup. This is done on 462 // sandbox create since the sandbox process will be created here. 463 pid := process.Pid() 464 if pid > 0 { 465 cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) 466 if err != nil { 467 return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err) 468 } 469 if err := s.oomPoller.add(s.id, cg); err != nil { 470 return nil, fmt.Errorf("add cg to OOM monitor: %w", err) 471 } 472 } 473 474 // Success 475 cu.Release() 476 s.task = process 477 return &taskAPI.CreateTaskResponse{ 478 Pid: uint32(process.Pid()), 479 }, nil 480 } 481 482 // Start starts a process. 483 func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { 484 log.L.Debugf("Start, id: %s, execID: %s", r.ID, r.ExecID) 485 486 p, err := s.getProcess(r.ExecID) 487 if err != nil { 488 return nil, err 489 } 490 if err := p.Start(ctx); err != nil { 491 return nil, err 492 } 493 // TODO: Set the cgroup and oom notifications on restore. 494 // https://github.com/google/gvisor-containerd-shim/issues/58 495 return &taskAPI.StartResponse{ 496 Pid: uint32(p.Pid()), 497 }, nil 498 } 499 500 // Delete deletes the initial process and container. 501 func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { 502 log.L.Debugf("Delete, id: %s, execID: %s", r.ID, r.ExecID) 503 504 p, err := s.getProcess(r.ExecID) 505 if err != nil { 506 return nil, err 507 } 508 if err := p.Delete(ctx); err != nil { 509 return nil, err 510 } 511 if len(r.ExecID) != 0 { 512 s.mu.Lock() 513 delete(s.processes, r.ExecID) 514 s.mu.Unlock() 515 } else if s.platform != nil { 516 s.platform.Close() 517 } 518 return &taskAPI.DeleteResponse{ 519 ExitStatus: uint32(p.ExitStatus()), 520 ExitedAt: p.ExitedAt(), 521 Pid: uint32(p.Pid()), 522 }, nil 523 } 524 525 // Exec spawns an additional process inside the container. 526 func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) { 527 log.L.Debugf("Exec, id: %s, execID: %s", r.ID, r.ExecID) 528 529 s.mu.Lock() 530 p := s.processes[r.ExecID] 531 s.mu.Unlock() 532 if p != nil { 533 return nil, utils.ErrToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) 534 } 535 if s.task == nil { 536 return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 537 } 538 process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ 539 ID: r.ExecID, 540 Terminal: r.Terminal, 541 Stdin: r.Stdin, 542 Stdout: r.Stdout, 543 Stderr: r.Stderr, 544 Spec: r.Spec, 545 }) 546 if err != nil { 547 return nil, utils.ErrToGRPC(err) 548 } 549 s.mu.Lock() 550 s.processes[r.ExecID] = process 551 s.mu.Unlock() 552 return empty, nil 553 } 554 555 // ResizePty resizes the terminal of a process. 556 func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) { 557 log.L.Debugf("ResizePty, id: %s, execID: %s, dimension: %dx%d", r.ID, r.ExecID, r.Height, r.Width) 558 559 p, err := s.getProcess(r.ExecID) 560 if err != nil { 561 return nil, err 562 } 563 ws := console.WinSize{ 564 Width: uint16(r.Width), 565 Height: uint16(r.Height), 566 } 567 if err := p.Resize(ws); err != nil { 568 return nil, utils.ErrToGRPC(err) 569 } 570 return empty, nil 571 } 572 573 // State returns runtime state information for a process. 574 func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { 575 log.L.Debugf("State, id: %s, execID: %s", r.ID, r.ExecID) 576 577 p, err := s.getProcess(r.ExecID) 578 if err != nil { 579 log.L.Debugf("State failed to find process: %v", err) 580 return nil, err 581 } 582 st, err := p.Status(ctx) 583 if err != nil { 584 log.L.Debugf("State failed: %v", err) 585 return nil, err 586 } 587 status := task.StatusUnknown 588 switch st { 589 case "created": 590 status = task.StatusCreated 591 case "running": 592 status = task.StatusRunning 593 case "stopped": 594 status = task.StatusStopped 595 } 596 sio := p.Stdio() 597 res := &taskAPI.StateResponse{ 598 ID: p.ID(), 599 Bundle: s.bundle, 600 Pid: uint32(p.Pid()), 601 Status: status, 602 Stdin: sio.Stdin, 603 Stdout: sio.Stdout, 604 Stderr: sio.Stderr, 605 Terminal: sio.Terminal, 606 ExitStatus: uint32(p.ExitStatus()), 607 ExitedAt: p.ExitedAt(), 608 } 609 log.L.Debugf("State succeeded, response: %+v", res) 610 return res, nil 611 } 612 613 // Pause the container. 614 func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) { 615 log.L.Debugf("Pause, id: %s", r.ID) 616 if s.task == nil { 617 log.L.Debugf("Pause error, id: %s: container not created", r.ID) 618 return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 619 } 620 err := s.task.Runtime().Pause(ctx, r.ID) 621 if err != nil { 622 return nil, err 623 } 624 return empty, nil 625 } 626 627 // Resume the container. 628 func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) { 629 log.L.Debugf("Resume, id: %s", r.ID) 630 if s.task == nil { 631 log.L.Debugf("Resume error, id: %s: container not created", r.ID) 632 return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 633 } 634 err := s.task.Runtime().Resume(ctx, r.ID) 635 if err != nil { 636 return nil, err 637 } 638 return empty, nil 639 } 640 641 // Kill a process with the provided signal. 642 func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) { 643 log.L.Debugf("Kill, id: %s, execID: %s, signal: %d, all: %t", r.ID, r.ExecID, r.Signal, r.All) 644 645 p, err := s.getProcess(r.ExecID) 646 if err != nil { 647 return nil, err 648 } 649 if err := p.Kill(ctx, r.Signal, r.All); err != nil { 650 log.L.Debugf("Kill failed: %v", err) 651 return nil, utils.ErrToGRPC(err) 652 } 653 log.L.Debugf("Kill succeeded") 654 return empty, nil 655 } 656 657 // Pids returns all pids inside the container. 658 func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { 659 log.L.Debugf("Pids, id: %s", r.ID) 660 661 pids, err := s.getContainerPids(ctx, r.ID) 662 if err != nil { 663 return nil, utils.ErrToGRPC(err) 664 } 665 var processes []*task.ProcessInfo 666 for _, pid := range pids { 667 pInfo := task.ProcessInfo{ 668 Pid: pid, 669 } 670 for _, p := range s.processes { 671 if p.Pid() == int(pid) { 672 d := &runctypes.ProcessDetails{ 673 ExecID: p.ID(), 674 } 675 a, err := typeurl.MarshalAny(d) 676 if err != nil { 677 return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) 678 } 679 pInfo.Info = a 680 break 681 } 682 } 683 processes = append(processes, &pInfo) 684 } 685 return &taskAPI.PidsResponse{ 686 Processes: processes, 687 }, nil 688 } 689 690 // CloseIO closes the I/O context of a process. 691 func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) { 692 log.L.Debugf("CloseIO, id: %s, execID: %s, stdin: %t", r.ID, r.ExecID, r.Stdin) 693 694 p, err := s.getProcess(r.ExecID) 695 if err != nil { 696 return nil, err 697 } 698 if stdin := p.Stdin(); stdin != nil { 699 if err := stdin.Close(); err != nil { 700 return nil, fmt.Errorf("close stdin: %w", err) 701 } 702 } 703 return empty, nil 704 } 705 706 // Checkpoint checkpoints the container. 707 func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { 708 log.L.Debugf("Checkpoint, id: %s", r.ID) 709 return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) 710 } 711 712 // Connect returns shim information such as the shim's pid. 713 func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { 714 log.L.Debugf("Connect, id: %s", r.ID) 715 716 var pid int 717 if s.task != nil { 718 pid = s.task.Pid() 719 } 720 return &taskAPI.ConnectResponse{ 721 ShimPid: uint32(os.Getpid()), 722 TaskPid: uint32(pid), 723 }, nil 724 } 725 726 func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) { 727 log.L.Debugf("Shutdown, id: %s", r.ID) 728 s.cancel() 729 if s.shimAddress != "" { 730 _ = shim.RemoveSocket(s.shimAddress) 731 } 732 os.Exit(0) 733 panic("Should not get here") 734 } 735 736 func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { 737 log.L.Debugf("Stats, id: %s", r.ID) 738 if s.task == nil { 739 log.L.Debugf("Stats error, id: %s: container not created", r.ID) 740 return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 741 } 742 stats, err := s.task.Stats(ctx, s.id) 743 if err != nil { 744 log.L.Debugf("Stats error, id: %s: %v", r.ID, err) 745 return nil, err 746 } 747 748 // gvisor currently (as of 2020-03-03) only returns the total memory 749 // usage and current PID value[0]. However, we copy the common fields here 750 // so that future updates will propagate correct information. We're 751 // using the cgroups.Metrics structure so we're returning the same type 752 // as runc. 753 // 754 // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 755 metrics := &cgroupsstats.Metrics{ 756 CPU: &cgroupsstats.CPUStat{ 757 Usage: &cgroupsstats.CPUUsage{ 758 Total: stats.Cpu.Usage.Total, 759 Kernel: stats.Cpu.Usage.Kernel, 760 User: stats.Cpu.Usage.User, 761 PerCPU: stats.Cpu.Usage.Percpu, 762 }, 763 Throttling: &cgroupsstats.Throttle{ 764 Periods: stats.Cpu.Throttling.Periods, 765 ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, 766 ThrottledTime: stats.Cpu.Throttling.ThrottledTime, 767 }, 768 }, 769 Memory: &cgroupsstats.MemoryStat{ 770 Cache: stats.Memory.Cache, 771 Usage: &cgroupsstats.MemoryEntry{ 772 Limit: stats.Memory.Usage.Limit, 773 Usage: stats.Memory.Usage.Usage, 774 Max: stats.Memory.Usage.Max, 775 Failcnt: stats.Memory.Usage.Failcnt, 776 }, 777 Swap: &cgroupsstats.MemoryEntry{ 778 Limit: stats.Memory.Swap.Limit, 779 Usage: stats.Memory.Swap.Usage, 780 Max: stats.Memory.Swap.Max, 781 Failcnt: stats.Memory.Swap.Failcnt, 782 }, 783 Kernel: &cgroupsstats.MemoryEntry{ 784 Limit: stats.Memory.Kernel.Limit, 785 Usage: stats.Memory.Kernel.Usage, 786 Max: stats.Memory.Kernel.Max, 787 Failcnt: stats.Memory.Kernel.Failcnt, 788 }, 789 KernelTCP: &cgroupsstats.MemoryEntry{ 790 Limit: stats.Memory.KernelTCP.Limit, 791 Usage: stats.Memory.KernelTCP.Usage, 792 Max: stats.Memory.KernelTCP.Max, 793 Failcnt: stats.Memory.KernelTCP.Failcnt, 794 }, 795 }, 796 Pids: &cgroupsstats.PidsStat{ 797 Current: stats.Pids.Current, 798 Limit: stats.Pids.Limit, 799 }, 800 } 801 data, err := typeurl.MarshalAny(metrics) 802 if err != nil { 803 log.L.Debugf("Stats error, id: %s: %v", r.ID, err) 804 return nil, err 805 } 806 log.L.Debugf("Stats success, id: %s: %+v", r.ID, data) 807 return &taskAPI.StatsResponse{ 808 Stats: data, 809 }, nil 810 } 811 812 // Update updates a running container. 813 func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { 814 return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) 815 } 816 817 // Wait waits for a process to exit. 818 func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { 819 log.L.Debugf("Wait, id: %s, execID: %s", r.ID, r.ExecID) 820 821 p, err := s.getProcess(r.ExecID) 822 if err != nil { 823 log.L.Debugf("Wait failed to find process: %v", err) 824 return nil, err 825 } 826 p.Wait() 827 828 res := &taskAPI.WaitResponse{ 829 ExitStatus: uint32(p.ExitStatus()), 830 ExitedAt: p.ExitedAt(), 831 } 832 log.L.Debugf("Wait succeeded, response: %+v", res) 833 return res, nil 834 } 835 836 func (s *service) processExits(ctx context.Context) { 837 for e := range s.ec { 838 s.checkProcesses(ctx, e) 839 } 840 } 841 842 func (s *service) checkProcesses(ctx context.Context, e proc.Exit) { 843 // TODO(random-liu): Add `shouldKillAll` logic if container pid 844 // namespace is supported. 845 for _, p := range s.allProcesses() { 846 if p.ID() == e.ID { 847 if ip, ok := p.(*proc.Init); ok { 848 // Ensure all children are killed. 849 log.L.Debugf("Container init process exited, killing all container processes") 850 ip.KillAll(ctx) 851 } 852 p.SetExited(e.Status) 853 s.events <- &events.TaskExit{ 854 ContainerID: s.id, 855 ID: p.ID(), 856 Pid: uint32(p.Pid()), 857 ExitStatus: uint32(e.Status), 858 ExitedAt: p.ExitedAt(), 859 } 860 return 861 } 862 } 863 } 864 865 func (s *service) allProcesses() (o []process.Process) { 866 s.mu.Lock() 867 defer s.mu.Unlock() 868 for _, p := range s.processes { 869 o = append(o, p) 870 } 871 if s.task != nil { 872 o = append(o, s.task) 873 } 874 return o 875 } 876 877 func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { 878 s.mu.Lock() 879 p := s.task 880 s.mu.Unlock() 881 if p == nil { 882 return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition) 883 } 884 ps, err := p.Runtime().Ps(ctx, id) 885 if err != nil { 886 return nil, err 887 } 888 pids := make([]uint32, 0, len(ps)) 889 for _, pid := range ps { 890 pids = append(pids, uint32(pid)) 891 } 892 return pids, nil 893 } 894 895 func (s *service) forward(ctx context.Context, publisher shim.Publisher) { 896 for e := range s.events { 897 err := publisher.Publish(ctx, getTopic(e), e) 898 if err != nil { 899 // Should not happen. 900 panic(fmt.Errorf("post event: %w", err)) 901 } 902 } 903 } 904 905 func (s *service) getProcess(execID string) (process.Process, error) { 906 s.mu.Lock() 907 defer s.mu.Unlock() 908 909 if execID == "" { 910 if s.task == nil { 911 return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") 912 } 913 return s.task, nil 914 } 915 916 p := s.processes[execID] 917 if p == nil { 918 return nil, utils.ErrToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) 919 } 920 return p, nil 921 } 922 923 func getTopic(e interface{}) string { 924 switch e.(type) { 925 case *events.TaskCreate: 926 return runtime.TaskCreateEventTopic 927 case *events.TaskStart: 928 return runtime.TaskStartEventTopic 929 case *events.TaskOOM: 930 return runtime.TaskOOMEventTopic 931 case *events.TaskExit: 932 return runtime.TaskExitEventTopic 933 case *events.TaskDelete: 934 return runtime.TaskDeleteEventTopic 935 case *events.TaskExecAdded: 936 return runtime.TaskExecAddedEventTopic 937 case *events.TaskExecStarted: 938 return runtime.TaskExecStartedEventTopic 939 default: 940 log.L.Infof("no topic for type %#v", e) 941 } 942 return runtime.TaskUnknownTopic 943 } 944 945 func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.CreateConfig, options *options, rootfs string) (*proc.Init, error) { 946 spec, err := utils.ReadSpec(r.Bundle) 947 if err != nil { 948 return nil, fmt.Errorf("read oci spec: %w", err) 949 } 950 951 updated, err := utils.UpdateVolumeAnnotations(spec) 952 if err != nil { 953 return nil, fmt.Errorf("update volume annotations: %w", err) 954 } 955 updated = updateCgroup(spec) || updated 956 957 if updated { 958 if err := utils.WriteSpec(r.Bundle, spec); err != nil { 959 return nil, err 960 } 961 } 962 963 runsc.FormatRunscLogPath(r.ID, options.RunscConfig) 964 runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) 965 p := proc.New(r.ID, runtime, stdio.Stdio{ 966 Stdin: r.Stdin, 967 Stdout: r.Stdout, 968 Stderr: r.Stderr, 969 Terminal: r.Terminal, 970 }) 971 p.Bundle = r.Bundle 972 p.Platform = platform 973 p.Rootfs = rootfs 974 p.WorkDir = workDir 975 p.IoUID = int(options.IoUID) 976 p.IoGID = int(options.IoGID) 977 p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox 978 p.UserLog = utils.UserLogPath(spec) 979 p.Monitor = reaper.Default 980 return p, nil 981 } 982 983 // updateCgroup updates cgroup path for the sandbox to make the sandbox join the 984 // pod cgroup and not the pause container cgroup. Returns true if the spec was 985 // modified. Ex.: 986 // /kubepods/burstable/pod123/abc => kubepods/burstable/pod123 987 // 988 func updateCgroup(spec *specs.Spec) bool { 989 if !utils.IsSandbox(spec) { 990 return false 991 } 992 if spec.Linux == nil || len(spec.Linux.CgroupsPath) == 0 { 993 return false 994 } 995 996 // Search backwards for the pod cgroup path to make the sandbox use it, 997 // instead of the pause container's cgroup. 998 parts := strings.Split(spec.Linux.CgroupsPath, string(filepath.Separator)) 999 for i := len(parts) - 1; i >= 0; i-- { 1000 if strings.HasPrefix(parts[i], "pod") { 1001 var path string 1002 for j := 0; j <= i; j++ { 1003 path = filepath.Join(path, parts[j]) 1004 } 1005 // Add back the initial '/' that may have been lost above. 1006 if filepath.IsAbs(spec.Linux.CgroupsPath) { 1007 path = string(filepath.Separator) + path 1008 } 1009 if spec.Linux.CgroupsPath == path { 1010 return false 1011 } 1012 spec.Linux.CgroupsPath = path 1013 return true 1014 } 1015 } 1016 return false 1017 }