github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/shim/proc/init.go (about) 1 // Copyright 2018 The containerd Authors. 2 // Copyright 2018 The gVisor Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package proc 17 18 import ( 19 "context" 20 "encoding/json" 21 "fmt" 22 "io" 23 "path/filepath" 24 "strings" 25 "sync" 26 "time" 27 28 "github.com/containerd/console" 29 "github.com/containerd/containerd/errdefs" 30 "github.com/containerd/containerd/log" 31 "github.com/containerd/containerd/mount" 32 "github.com/containerd/containerd/pkg/process" 33 "github.com/containerd/containerd/pkg/stdio" 34 "github.com/containerd/fifo" 35 runc "github.com/containerd/go-runc" 36 specs "github.com/opencontainers/runtime-spec/specs-go" 37 "golang.org/x/sys/unix" 38 39 "github.com/SagerNet/gvisor/pkg/shim/runsc" 40 ) 41 42 const statusStopped = "stopped" 43 44 // Init represents an initial process for a container. 45 type Init struct { 46 wg sync.WaitGroup 47 initState initState 48 49 // mu is used to ensure that `Start()` and `Exited()` calls return in 50 // the right order when invoked in separate go routines. This is the 51 // case within the shim implementation as it makes use of the reaper 52 // interface. 53 mu sync.Mutex 54 55 waitBlock chan struct{} 56 57 WorkDir string 58 59 id string 60 Bundle string 61 console console.Console 62 Platform stdio.Platform 63 io runc.IO 64 runtime *runsc.Runsc 65 status int 66 exited time.Time 67 pid int 68 closers []io.Closer 69 stdin io.Closer 70 stdio stdio.Stdio 71 Rootfs string 72 IoUID int 73 IoGID int 74 Sandbox bool 75 UserLog string 76 Monitor ProcessMonitor 77 } 78 79 // NewRunsc returns a new runsc instance for a process. 80 func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { 81 if root == "" { 82 root = RunscRoot 83 } 84 return &runsc.Runsc{ 85 Command: runtime, 86 PdeathSignal: unix.SIGKILL, 87 Log: filepath.Join(path, "log.json"), 88 LogFormat: runc.JSON, 89 Root: filepath.Join(root, namespace), 90 Config: config, 91 } 92 } 93 94 // New returns a new init process. 95 func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init { 96 p := &Init{ 97 id: id, 98 runtime: runtime, 99 stdio: stdio, 100 status: 0, 101 waitBlock: make(chan struct{}), 102 } 103 p.initState = &createdState{p: p} 104 return p 105 } 106 107 // Create the process with the provided config. 108 func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { 109 var socket *runc.Socket 110 if r.Terminal { 111 if socket, err = runc.NewTempConsoleSocket(); err != nil { 112 return fmt.Errorf("failed to create OCI runtime console socket: %w", err) 113 } 114 defer socket.Close() 115 } else if hasNoIO(r) { 116 if p.io, err = runc.NewNullIO(); err != nil { 117 return fmt.Errorf("creating new NULL IO: %w", err) 118 } 119 } else { 120 if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { 121 return fmt.Errorf("failed to create OCI runtime io pipes: %w", err) 122 } 123 } 124 // pidFile is the file that will contain the sandbox pid. 125 pidFile := filepath.Join(p.Bundle, "init.pid") 126 opts := &runsc.CreateOpts{ 127 PidFile: pidFile, 128 } 129 if socket != nil { 130 opts.ConsoleSocket = socket 131 } 132 if p.Sandbox { 133 opts.IO = p.io 134 // UserLog is only useful for sandbox. 135 opts.UserLog = p.UserLog 136 } 137 if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { 138 return p.runtimeError(err, "OCI runtime create failed") 139 } 140 if r.Stdin != "" { 141 sc, err := fifo.OpenFifo(context.Background(), r.Stdin, unix.O_WRONLY|unix.O_NONBLOCK, 0) 142 if err != nil { 143 return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err) 144 } 145 p.stdin = sc 146 p.closers = append(p.closers, sc) 147 } 148 ctx, cancel := context.WithTimeout(ctx, 30*time.Second) 149 defer cancel() 150 if socket != nil { 151 console, err := socket.ReceiveMaster() 152 if err != nil { 153 return fmt.Errorf("failed to retrieve console master: %w", err) 154 } 155 console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg) 156 if err != nil { 157 return fmt.Errorf("failed to start console copy: %w", err) 158 } 159 p.console = console 160 } else if !hasNoIO(r) { 161 if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil { 162 return fmt.Errorf("failed to start io pipe copy: %w", err) 163 } 164 } 165 pid, err := runc.ReadPidFile(pidFile) 166 if err != nil { 167 return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err) 168 } 169 p.pid = pid 170 return nil 171 } 172 173 // Wait waits for the process to exit. 174 func (p *Init) Wait() { 175 <-p.waitBlock 176 } 177 178 // ID returns the ID of the process. 179 func (p *Init) ID() string { 180 return p.id 181 } 182 183 // Pid returns the PID of the process. 184 func (p *Init) Pid() int { 185 return p.pid 186 } 187 188 // ExitStatus returns the exit status of the process. 189 func (p *Init) ExitStatus() int { 190 p.mu.Lock() 191 defer p.mu.Unlock() 192 return p.status 193 } 194 195 // ExitedAt returns the time when the process exited. 196 func (p *Init) ExitedAt() time.Time { 197 p.mu.Lock() 198 defer p.mu.Unlock() 199 return p.exited 200 } 201 202 // Status returns the status of the process. 203 func (p *Init) Status(ctx context.Context) (string, error) { 204 p.mu.Lock() 205 defer p.mu.Unlock() 206 207 return p.initState.State(ctx) 208 } 209 210 func (p *Init) state(ctx context.Context) (string, error) { 211 c, err := p.runtime.State(ctx, p.id) 212 if err != nil { 213 if strings.Contains(err.Error(), "does not exist") { 214 return statusStopped, nil 215 } 216 return "", p.runtimeError(err, "OCI runtime state failed") 217 } 218 return p.convertStatus(c.Status), nil 219 } 220 221 // Start starts the init process. 222 func (p *Init) Start(ctx context.Context) error { 223 p.mu.Lock() 224 defer p.mu.Unlock() 225 226 return p.initState.Start(ctx) 227 } 228 229 func (p *Init) start(ctx context.Context) error { 230 var cio runc.IO 231 if !p.Sandbox { 232 cio = p.io 233 } 234 if err := p.runtime.Start(ctx, p.id, cio); err != nil { 235 return p.runtimeError(err, "OCI runtime start failed") 236 } 237 go func() { 238 status, err := p.runtime.Wait(context.Background(), p.id) 239 if err != nil { 240 log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) 241 p.killAllLocked(ctx) 242 status = internalErrorCode 243 } 244 ExitCh <- Exit{ 245 Timestamp: time.Now(), 246 ID: p.id, 247 Status: status, 248 } 249 }() 250 return nil 251 } 252 253 // SetExited set the exit stauts of the init process. 254 func (p *Init) SetExited(status int) { 255 p.mu.Lock() 256 defer p.mu.Unlock() 257 258 p.initState.SetExited(status) 259 } 260 261 func (p *Init) setExited(status int) { 262 if !p.exited.IsZero() { 263 log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status) 264 return 265 } 266 267 log.L.Debugf("Setting status: %d", status) 268 p.exited = time.Now() 269 p.status = status 270 p.Platform.ShutdownConsole(context.Background(), p.console) 271 close(p.waitBlock) 272 } 273 274 // Delete deletes the init process. 275 func (p *Init) Delete(ctx context.Context) error { 276 p.mu.Lock() 277 defer p.mu.Unlock() 278 279 return p.initState.Delete(ctx) 280 } 281 282 func (p *Init) delete(ctx context.Context) error { 283 p.killAllLocked(ctx) 284 p.wg.Wait() 285 286 err := p.runtime.Delete(ctx, p.id, nil) 287 if err != nil { 288 // ignore errors if a runtime has already deleted the process 289 // but we still hold metadata and pipes 290 // 291 // this is common during a checkpoint, runc will delete the container state 292 // after a checkpoint and the container will no longer exist within runc 293 if strings.Contains(err.Error(), "does not exist") { 294 err = nil 295 } else { 296 err = p.runtimeError(err, "failed to delete task") 297 } 298 } 299 if p.io != nil { 300 for _, c := range p.closers { 301 c.Close() 302 } 303 p.io.Close() 304 } 305 if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { 306 log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") 307 if err == nil { 308 err = fmt.Errorf("failed rootfs umount: %w", err2) 309 } 310 } 311 return err 312 } 313 314 // Resize resizes the init processes console. 315 func (p *Init) Resize(ws console.WinSize) error { 316 p.mu.Lock() 317 defer p.mu.Unlock() 318 319 if p.console == nil { 320 return nil 321 } 322 return p.console.Resize(ws) 323 } 324 325 func (p *Init) resize(ws console.WinSize) error { 326 if p.console == nil { 327 return nil 328 } 329 return p.console.Resize(ws) 330 } 331 332 // Kill kills the init process. 333 func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { 334 p.mu.Lock() 335 defer p.mu.Unlock() 336 337 return p.initState.Kill(ctx, signal, all) 338 } 339 340 func (p *Init) kill(ctx context.Context, signal uint32, all bool) error { 341 var ( 342 killErr error 343 backoff = 100 * time.Millisecond 344 ) 345 const timeout = time.Second 346 for start := time.Now(); time.Since(start) < timeout; { 347 state, err := p.initState.State(ctx) 348 if err != nil { 349 return p.runtimeError(err, "OCI runtime state failed") 350 } 351 // For runsc, signal only works when container is running state. 352 // If the container is not in running state, directly return 353 // "no such process" 354 if state == statusStopped { 355 return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) 356 } 357 killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all}) 358 if killErr == nil { 359 return nil 360 } 361 time.Sleep(backoff) 362 backoff *= 2 363 } 364 return p.runtimeError(killErr, "kill timeout") 365 } 366 367 // KillAll kills all processes belonging to the init process. If 368 // `runsc kill --all` returns error, assume the container has already stopped. 369 func (p *Init) KillAll(context context.Context) { 370 p.mu.Lock() 371 defer p.mu.Unlock() 372 p.killAllLocked(context) 373 } 374 375 func (p *Init) killAllLocked(context context.Context) { 376 if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil { 377 log.L.Warningf("Ignoring error killing container %q: %v", p.id, err) 378 } 379 } 380 381 // Stdin returns the stdin of the process. 382 func (p *Init) Stdin() io.Closer { 383 return p.stdin 384 } 385 386 // Runtime returns the OCI runtime configured for the init process. 387 func (p *Init) Runtime() *runsc.Runsc { 388 return p.runtime 389 } 390 391 // Exec returns a new child process. 392 func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { 393 p.mu.Lock() 394 defer p.mu.Unlock() 395 396 return p.initState.Exec(ctx, path, r) 397 } 398 399 // exec returns a new exec'd process. 400 func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { 401 var spec specs.Process 402 if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { 403 return nil, err 404 } 405 spec.Terminal = r.Terminal 406 407 e := &execProcess{ 408 id: r.ID, 409 path: path, 410 parent: p, 411 spec: spec, 412 stdio: stdio.Stdio{ 413 Stdin: r.Stdin, 414 Stdout: r.Stdout, 415 Stderr: r.Stderr, 416 Terminal: r.Terminal, 417 }, 418 waitBlock: make(chan struct{}), 419 } 420 e.execState = &execCreatedState{p: e} 421 return e, nil 422 } 423 424 func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) { 425 p.mu.Lock() 426 defer p.mu.Unlock() 427 428 return p.initState.Stats(ctx, id) 429 } 430 431 func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) { 432 return p.Runtime().Stats(ctx, id) 433 } 434 435 // Stdio returns the stdio of the process. 436 func (p *Init) Stdio() stdio.Stdio { 437 return p.stdio 438 } 439 440 func (p *Init) runtimeError(rErr error, msg string) error { 441 if rErr == nil { 442 return nil 443 } 444 445 rMsg, err := getLastRuntimeError(p.runtime) 446 switch { 447 case err != nil: 448 return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err) 449 case rMsg == "": 450 return fmt.Errorf("%s: %w", msg, rErr) 451 default: 452 return fmt.Errorf("%s: %s", msg, rMsg) 453 } 454 } 455 456 func (p *Init) convertStatus(status string) string { 457 if status == "created" && !p.Sandbox && p.status == internalErrorCode { 458 // Treat start failure state for non-root container as stopped. 459 return statusStopped 460 } 461 return status 462 } 463 464 func withConditionalIO(c stdio.Stdio) runc.IOOpt { 465 return func(o *runc.IOOption) { 466 o.OpenStdin = c.Stdin != "" 467 o.OpenStdout = c.Stdout != "" 468 o.OpenStderr = c.Stderr != "" 469 } 470 }