github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/shim/proc/init.go (about) 1 // Copyright 2018 The containerd Authors. 2 // Copyright 2018 The gVisor Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package proc 17 18 import ( 19 "context" 20 "encoding/json" 21 "fmt" 22 "io" 23 "path/filepath" 24 "strings" 25 "sync" 26 "time" 27 28 "github.com/containerd/console" 29 30 "github.com/containerd/containerd/errdefs" 31 "github.com/containerd/containerd/log" 32 "github.com/containerd/containerd/mount" 33 "github.com/containerd/containerd/pkg/process" 34 "github.com/containerd/containerd/pkg/stdio" 35 36 "github.com/MerlinKodo/gvisor/pkg/shim/runsc" 37 "github.com/MerlinKodo/gvisor/pkg/shim/utils" 38 "github.com/containerd/fifo" 39 runc "github.com/containerd/go-runc" 40 specs "github.com/opencontainers/runtime-spec/specs-go" 41 "golang.org/x/sys/unix" 42 ) 43 44 const statusStopped = "stopped" 45 46 // Init represents an initial process for a container. 47 type Init struct { 48 wg sync.WaitGroup 49 initState initState 50 51 // mu is used to ensure that `Start()` and `Exited()` calls return in 52 // the right order when invoked in separate go routines. This is the 53 // case within the shim implementation as it makes use of the reaper 54 // interface. 55 mu sync.Mutex 56 57 waitBlock chan struct{} 58 59 WorkDir string 60 61 id string 62 Bundle string 63 console console.Console 64 Platform stdio.Platform 65 io runc.IO 66 runtime *runsc.Runsc 67 status int 68 exited time.Time 69 pid int 70 closers []io.Closer 71 stdin io.Closer 72 stdio stdio.Stdio 73 Rootfs string 74 IoUID int 75 IoGID int 76 Sandbox bool 77 UserLog string 78 Monitor ProcessMonitor 79 } 80 81 // NewRunsc returns a new runsc instance for a process. 82 func NewRunsc(root, path, namespace, runtime string, config map[string]string, spec *specs.Spec) *runsc.Runsc { 83 if root == "" { 84 root = RunscRoot 85 } 86 return &runsc.Runsc{ 87 Command: runtime, 88 PdeathSignal: unix.SIGKILL, 89 Log: filepath.Join(path, "log.json"), 90 LogFormat: runc.JSON, 91 PanicLog: utils.PanicLogPath(spec), 92 Root: filepath.Join(root, namespace), 93 Config: config, 94 } 95 } 96 97 // New returns a new init process. 98 func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init { 99 p := &Init{ 100 id: id, 101 runtime: runtime, 102 stdio: stdio, 103 status: 0, 104 waitBlock: make(chan struct{}), 105 } 106 p.initState = &createdState{p: p} 107 return p 108 } 109 110 // Create the process with the provided config. 111 func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { 112 var socket *runc.Socket 113 if r.Terminal { 114 if socket, err = runc.NewTempConsoleSocket(); err != nil { 115 return fmt.Errorf("failed to create OCI runtime console socket: %w", err) 116 } 117 defer socket.Close() 118 } else if hasNoIO(r) { 119 if p.io, err = runc.NewNullIO(); err != nil { 120 return fmt.Errorf("creating new NULL IO: %w", err) 121 } 122 } else { 123 if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { 124 return fmt.Errorf("failed to create OCI runtime io pipes: %w", err) 125 } 126 } 127 // pidFile is the file that will contain the sandbox pid. 128 pidFile := filepath.Join(p.Bundle, "init.pid") 129 opts := &runsc.CreateOpts{ 130 PidFile: pidFile, 131 } 132 if socket != nil { 133 opts.ConsoleSocket = socket 134 } 135 if p.Sandbox { 136 opts.IO = p.io 137 // UserLog is only useful for sandbox. 138 opts.UserLog = p.UserLog 139 } 140 if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { 141 return p.runtimeError(err, "OCI runtime create failed") 142 } 143 if r.Stdin != "" { 144 sc, err := fifo.OpenFifo(context.Background(), r.Stdin, unix.O_WRONLY|unix.O_NONBLOCK, 0) 145 if err != nil { 146 return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err) 147 } 148 p.stdin = sc 149 p.closers = append(p.closers, sc) 150 } 151 ctx, cancel := context.WithTimeout(ctx, 30*time.Second) 152 defer cancel() 153 if socket != nil { 154 console, err := socket.ReceiveMaster() 155 if err != nil { 156 return fmt.Errorf("failed to retrieve console master: %w", err) 157 } 158 console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg) 159 if err != nil { 160 return fmt.Errorf("failed to start console copy: %w", err) 161 } 162 p.console = console 163 } else if !hasNoIO(r) { 164 if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil { 165 return fmt.Errorf("failed to start io pipe copy: %w", err) 166 } 167 } 168 pid, err := runc.ReadPidFile(pidFile) 169 if err != nil { 170 return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err) 171 } 172 p.pid = pid 173 return nil 174 } 175 176 // Wait waits for the process to exit. 177 func (p *Init) Wait() { 178 <-p.waitBlock 179 } 180 181 // ID returns the ID of the process. 182 func (p *Init) ID() string { 183 return p.id 184 } 185 186 // Pid returns the PID of the process. 187 func (p *Init) Pid() int { 188 return p.pid 189 } 190 191 // ExitStatus returns the exit status of the process. 192 func (p *Init) ExitStatus() int { 193 p.mu.Lock() 194 defer p.mu.Unlock() 195 return p.status 196 } 197 198 // ExitedAt returns the time when the process exited. 199 func (p *Init) ExitedAt() time.Time { 200 p.mu.Lock() 201 defer p.mu.Unlock() 202 return p.exited 203 } 204 205 // Status returns the status of the process. 206 func (p *Init) Status(ctx context.Context) (string, error) { 207 p.mu.Lock() 208 defer p.mu.Unlock() 209 210 return p.initState.State(ctx) 211 } 212 213 func (p *Init) state(ctx context.Context) (string, error) { 214 c, err := p.runtime.State(ctx, p.id) 215 if err != nil { 216 if strings.Contains(err.Error(), "does not exist") { 217 return statusStopped, nil 218 } 219 return "", p.runtimeError(err, "OCI runtime state failed") 220 } 221 return p.convertStatus(c.Status), nil 222 } 223 224 // Start starts the init process. 225 func (p *Init) Start(ctx context.Context) error { 226 p.mu.Lock() 227 defer p.mu.Unlock() 228 229 return p.initState.Start(ctx) 230 } 231 232 func (p *Init) start(ctx context.Context) error { 233 var cio runc.IO 234 if !p.Sandbox { 235 cio = p.io 236 } 237 if err := p.runtime.Start(ctx, p.id, cio); err != nil { 238 return p.runtimeError(err, "OCI runtime start failed") 239 } 240 go func() { 241 status, err := p.runtime.Wait(context.Background(), p.id) 242 if err != nil { 243 log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) 244 p.killAllLocked(ctx) 245 status = internalErrorCode 246 } 247 ExitCh <- Exit{ 248 Timestamp: time.Now(), 249 ID: p.id, 250 Status: status, 251 } 252 }() 253 return nil 254 } 255 256 // SetExited set the exit stauts of the init process. 257 func (p *Init) SetExited(status int) { 258 p.mu.Lock() 259 defer p.mu.Unlock() 260 261 p.initState.SetExited(status) 262 } 263 264 func (p *Init) setExited(status int) { 265 if !p.exited.IsZero() { 266 log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status) 267 return 268 } 269 270 log.L.Debugf("Setting status: %d", status) 271 p.exited = time.Now() 272 p.status = status 273 p.Platform.ShutdownConsole(context.Background(), p.console) 274 close(p.waitBlock) 275 } 276 277 // Delete deletes the init process. 278 func (p *Init) Delete(ctx context.Context) error { 279 p.mu.Lock() 280 defer p.mu.Unlock() 281 282 return p.initState.Delete(ctx) 283 } 284 285 func (p *Init) delete(ctx context.Context) error { 286 p.killAllLocked(ctx) 287 p.wg.Wait() 288 289 err := p.runtime.Delete(ctx, p.id, nil) 290 if err != nil { 291 // ignore errors if a runtime has already deleted the process 292 // but we still hold metadata and pipes 293 // 294 // this is common during a checkpoint, runc will delete the container state 295 // after a checkpoint and the container will no longer exist within runc 296 if strings.Contains(err.Error(), "does not exist") { 297 err = nil 298 } else { 299 err = p.runtimeError(err, "failed to delete task") 300 } 301 } 302 if p.io != nil { 303 for _, c := range p.closers { 304 c.Close() 305 } 306 p.io.Close() 307 } 308 if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { 309 log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") 310 if err == nil { 311 err = fmt.Errorf("failed rootfs umount: %w", err2) 312 } 313 } 314 return err 315 } 316 317 // Resize resizes the init processes console. 318 func (p *Init) Resize(ws console.WinSize) error { 319 p.mu.Lock() 320 defer p.mu.Unlock() 321 322 if p.console == nil { 323 return nil 324 } 325 return p.console.Resize(ws) 326 } 327 328 func (p *Init) resize(ws console.WinSize) error { 329 if p.console == nil { 330 return nil 331 } 332 return p.console.Resize(ws) 333 } 334 335 // Kill kills the init process. 336 func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { 337 p.mu.Lock() 338 defer p.mu.Unlock() 339 340 return p.initState.Kill(ctx, signal, all) 341 } 342 343 func (p *Init) kill(ctx context.Context, signal uint32, all bool) error { 344 var ( 345 killErr error 346 backoff = 100 * time.Millisecond 347 ) 348 const timeout = time.Second 349 for start := time.Now(); time.Since(start) < timeout; { 350 state, err := p.initState.State(ctx) 351 if err != nil { 352 return p.runtimeError(err, "OCI runtime state failed") 353 } 354 // For runsc, signal only works when container is running state. 355 // If the container is not in running state, directly return 356 // "no such process" 357 if state == statusStopped { 358 return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) 359 } 360 killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all}) 361 if killErr == nil { 362 return nil 363 } 364 time.Sleep(backoff) 365 backoff *= 2 366 } 367 return p.runtimeError(killErr, "kill timeout") 368 } 369 370 // KillAll kills all processes belonging to the init process. If 371 // `runsc kill --all` returns error, assume the container has already stopped. 372 func (p *Init) KillAll(context context.Context) { 373 p.mu.Lock() 374 defer p.mu.Unlock() 375 p.killAllLocked(context) 376 } 377 378 func (p *Init) killAllLocked(context context.Context) { 379 if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil { 380 log.L.Warningf("Ignoring error killing container %q: %v", p.id, err) 381 } 382 } 383 384 // Stdin returns the stdin of the process. 385 func (p *Init) Stdin() io.Closer { 386 return p.stdin 387 } 388 389 // Runtime returns the OCI runtime configured for the init process. 390 func (p *Init) Runtime() *runsc.Runsc { 391 return p.runtime 392 } 393 394 // Exec returns a new child process. 395 func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { 396 p.mu.Lock() 397 defer p.mu.Unlock() 398 399 return p.initState.Exec(ctx, path, r) 400 } 401 402 // exec returns a new exec'd process. 403 func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { 404 var spec specs.Process 405 if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { 406 return nil, err 407 } 408 spec.Terminal = r.Terminal 409 410 e := &execProcess{ 411 id: r.ID, 412 path: path, 413 parent: p, 414 spec: spec, 415 stdio: stdio.Stdio{ 416 Stdin: r.Stdin, 417 Stdout: r.Stdout, 418 Stderr: r.Stderr, 419 Terminal: r.Terminal, 420 }, 421 waitBlock: make(chan struct{}), 422 } 423 e.execState = &execCreatedState{p: e} 424 return e, nil 425 } 426 427 func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) { 428 p.mu.Lock() 429 defer p.mu.Unlock() 430 431 return p.initState.Stats(ctx, id) 432 } 433 434 func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) { 435 return p.Runtime().Stats(ctx, id) 436 } 437 438 // Stdio returns the stdio of the process. 439 func (p *Init) Stdio() stdio.Stdio { 440 return p.stdio 441 } 442 443 func (p *Init) runtimeError(rErr error, msg string) error { 444 if rErr == nil { 445 return nil 446 } 447 448 rMsg, err := getLastRuntimeError(p.runtime) 449 switch { 450 case err != nil: 451 return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err) 452 case rMsg == "": 453 return fmt.Errorf("%s: %w", msg, rErr) 454 default: 455 return fmt.Errorf("%s: %s", msg, rMsg) 456 } 457 } 458 459 func (p *Init) convertStatus(status string) string { 460 if status == "created" && !p.Sandbox && p.status == internalErrorCode { 461 // Treat start failure state for non-root container as stopped. 462 return statusStopped 463 } 464 return status 465 } 466 467 func withConditionalIO(c stdio.Stdio) runc.IOOpt { 468 return func(o *runc.IOOption) { 469 o.OpenStdin = c.Stdin != "" 470 o.OpenStdout = c.Stdout != "" 471 o.OpenStderr = c.Stderr != "" 472 } 473 }