github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/vm/gvisor/gvisor.go (about) 1 // Copyright 2018 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package gvisor provides support for gVisor, user-space kernel, testing. 5 // See https://github.com/google/gvisor 6 package gvisor 7 8 import ( 9 "bytes" 10 "fmt" 11 "io" 12 "net" 13 "os" 14 "os/exec" 15 "path/filepath" 16 "strings" 17 "syscall" 18 "time" 19 20 "github.com/google/syzkaller/pkg/config" 21 "github.com/google/syzkaller/pkg/log" 22 "github.com/google/syzkaller/pkg/osutil" 23 "github.com/google/syzkaller/pkg/report" 24 "github.com/google/syzkaller/vm/vmimpl" 25 ) 26 27 func init() { 28 vmimpl.Register("gvisor", ctor, true) 29 } 30 31 type Config struct { 32 Count int `json:"count"` // number of VMs to use 33 RunscArgs string `json:"runsc_args"` 34 MemoryTotalBytes uint64 `json:"memory_total_bytes"` 35 } 36 37 type Pool struct { 38 env *vmimpl.Env 39 cfg *Config 40 } 41 42 type instance struct { 43 cfg *Config 44 image string 45 debug bool 46 rootDir string 47 imageDir string 48 name string 49 port int 50 cmd *exec.Cmd 51 merger *vmimpl.OutputMerger 52 } 53 54 func ctor(env *vmimpl.Env) (vmimpl.Pool, error) { 55 cfg := &Config{ 56 Count: 1, 57 } 58 if err := config.LoadData(env.Config, cfg); err != nil { 59 return nil, fmt.Errorf("failed to parse vm config: %w", err) 60 } 61 if cfg.Count < 1 || cfg.Count > 128 { 62 return nil, fmt.Errorf("invalid config param count: %v, want [1, 128]", cfg.Count) 63 } 64 hostTotalMemory := osutil.SystemMemorySize() 65 minMemory := uint64(cfg.Count) * 10_000_000 66 if cfg.MemoryTotalBytes != 0 && (cfg.MemoryTotalBytes < minMemory || cfg.MemoryTotalBytes > hostTotalMemory) { 67 return nil, fmt.Errorf("invalid config param memory_total_bytes: %v, want [%d,%d]", 68 minMemory, cfg.MemoryTotalBytes, hostTotalMemory) 69 } 70 if env.Debug && cfg.Count > 1 { 71 log.Logf(0, "limiting number of VMs from %v to 1 in debug mode", cfg.Count) 72 cfg.Count = 1 73 } 74 if !osutil.IsExist(env.Image) { 75 return nil, fmt.Errorf("image file %q does not exist", env.Image) 76 } 77 pool := &Pool{ 78 cfg: cfg, 79 env: env, 80 } 81 return pool, nil 82 } 83 84 func (pool *Pool) Count() int { 85 return pool.cfg.Count 86 } 87 88 func (pool *Pool) Create(workdir string, index int) (vmimpl.Instance, error) { 89 rootDir := filepath.Clean(filepath.Join(workdir, "..", "gvisor_root")) 90 imageDir := filepath.Join(workdir, "image") 91 bundleDir := filepath.Join(workdir, "bundle") 92 osutil.MkdirAll(rootDir) 93 osutil.MkdirAll(bundleDir) 94 osutil.MkdirAll(imageDir) 95 96 caps := "" 97 for _, c := range sandboxCaps { 98 if caps != "" { 99 caps += ", " 100 } 101 caps += "\"" + c + "\"" 102 } 103 name := fmt.Sprintf("%v-%v", pool.env.Name, index) 104 memoryLimit := int64(pool.cfg.MemoryTotalBytes / uint64(pool.Count())) 105 if pool.cfg.MemoryTotalBytes == 0 { 106 memoryLimit = -1 107 } 108 vmConfig := fmt.Sprintf(configTempl, imageDir, caps, name, memoryLimit) 109 if err := osutil.WriteFile(filepath.Join(bundleDir, "config.json"), []byte(vmConfig)); err != nil { 110 return nil, err 111 } 112 bin, err := exec.LookPath(os.Args[0]) 113 if err != nil { 114 return nil, fmt.Errorf("failed to lookup %v: %w", os.Args[0], err) 115 } 116 if err := osutil.CopyFile(bin, filepath.Join(imageDir, "init")); err != nil { 117 return nil, err 118 } 119 120 panicLog := filepath.Join(bundleDir, "panic.fifo") 121 if err := syscall.Mkfifo(panicLog, 0666); err != nil { 122 return nil, err 123 } 124 defer syscall.Unlink(panicLog) 125 126 // Open the fifo for read-write to be able to open for read-only 127 // without blocking. 128 panicLogWriteFD, err := os.OpenFile(panicLog, os.O_RDWR, 0) 129 if err != nil { 130 return nil, err 131 } 132 defer panicLogWriteFD.Close() 133 134 panicLogReadFD, err := os.Open(panicLog) 135 if err != nil { 136 return nil, err 137 } 138 139 rpipe, wpipe, err := osutil.LongPipe() 140 if err != nil { 141 panicLogReadFD.Close() 142 return nil, err 143 } 144 var tee io.Writer 145 if pool.env.Debug { 146 tee = os.Stdout 147 } 148 merger := vmimpl.NewOutputMerger(tee) 149 merger.Add("gvisor", rpipe) 150 merger.Add("gvisor-goruntime", panicLogReadFD) 151 152 inst := &instance{ 153 cfg: pool.cfg, 154 image: pool.env.Image, 155 debug: pool.env.Debug, 156 rootDir: rootDir, 157 imageDir: imageDir, 158 name: name, 159 merger: merger, 160 } 161 162 // Kill the previous instance in case it's still running. 163 osutil.Run(time.Minute, inst.runscCmd("delete", "-force", inst.name)) 164 time.Sleep(3 * time.Second) 165 166 cmd := inst.runscCmd("--panic-log", panicLog, "run", "-bundle", bundleDir, inst.name) 167 cmd.Stdout = wpipe 168 cmd.Stderr = wpipe 169 if err := cmd.Start(); err != nil { 170 wpipe.Close() 171 panicLogWriteFD.Close() 172 merger.Wait() 173 return nil, err 174 } 175 inst.cmd = cmd 176 wpipe.Close() 177 178 if err := inst.waitBoot(); err != nil { 179 panicLogWriteFD.Close() 180 inst.Close() 181 return nil, err 182 } 183 return inst, nil 184 } 185 186 func (inst *instance) waitBoot() error { 187 errorMsg := []byte("FATAL ERROR:") 188 bootedMsg := []byte(initStartMsg) 189 timeout := time.NewTimer(time.Minute) 190 defer timeout.Stop() 191 var output []byte 192 for { 193 select { 194 case out := <-inst.merger.Output: 195 output = append(output, out...) 196 if pos := bytes.Index(output, errorMsg); pos != -1 { 197 end := bytes.IndexByte(output[pos:], '\n') 198 if end == -1 { 199 end = len(output) 200 } else { 201 end += pos 202 } 203 return vmimpl.BootError{ 204 Title: string(output[pos:end]), 205 Output: output, 206 } 207 } 208 if bytes.Contains(output, bootedMsg) { 209 return nil 210 } 211 case err := <-inst.merger.Err: 212 return vmimpl.BootError{ 213 Title: fmt.Sprintf("runsc failed: %v", err), 214 Output: output, 215 } 216 case <-timeout.C: 217 return vmimpl.BootError{ 218 Title: "init process did not start", 219 Output: output, 220 } 221 } 222 } 223 } 224 225 func (inst *instance) args() []string { 226 args := []string{ 227 "-root", inst.rootDir, 228 "-watchdog-action=panic", 229 "-network=none", 230 "-debug", 231 // Send debug logs to stderr, so that they will be picked up by 232 // syzkaller. Without this, debug logs are sent to /dev/null. 233 "-debug-log=/dev/stderr", 234 } 235 if inst.cfg.RunscArgs != "" { 236 args = append(args, strings.Split(inst.cfg.RunscArgs, " ")...) 237 } 238 return args 239 } 240 241 func (inst *instance) Info() ([]byte, error) { 242 info := fmt.Sprintf("%v %v\n", inst.image, strings.Join(inst.args(), " ")) 243 return []byte(info), nil 244 } 245 246 func (inst *instance) PprofPort() int { 247 // Some of the gVisor instances use the host's network namespace, which 248 // results in conflicting bind operations on the same HTTP port. 249 // Until there's an actual need to debug gVisor VMs with pprof, let's 250 // just disable it. 251 return 0 252 } 253 254 func (inst *instance) runscCmd(add ...string) *exec.Cmd { 255 cmd := osutil.Command(inst.image, append(inst.args(), add...)...) 256 cmd.Env = []string{ 257 "GOTRACEBACK=all", 258 "GORACE=halt_on_error=1", 259 // New glibc-s enable rseq by default but ptrace and systrap 260 // platforms don't work in this case. runsc is linked with libc 261 // only when the race detector is enabled. 262 "GLIBC_TUNABLES=glibc.pthread.rseq=0", 263 } 264 return cmd 265 } 266 267 func (inst *instance) Close() { 268 time.Sleep(3 * time.Second) 269 osutil.Run(time.Minute, inst.runscCmd("delete", "-force", inst.name)) 270 inst.cmd.Process.Kill() 271 inst.merger.Wait() 272 inst.cmd.Wait() 273 osutil.Run(time.Minute, inst.runscCmd("delete", "-force", inst.name)) 274 time.Sleep(3 * time.Second) 275 } 276 277 func (inst *instance) Forward(port int) (string, error) { 278 if inst.port != 0 { 279 return "", fmt.Errorf("forward port is already setup") 280 } 281 inst.port = port 282 return "stdin", nil 283 } 284 285 func (inst *instance) Copy(hostSrc string) (string, error) { 286 fname := filepath.Base(hostSrc) 287 if err := osutil.CopyFile(hostSrc, filepath.Join(inst.imageDir, fname)); err != nil { 288 return "", err 289 } 290 if err := os.Chmod(inst.imageDir, 0777); err != nil { 291 return "", err 292 } 293 return filepath.Join("/", fname), nil 294 } 295 296 func (inst *instance) Run(timeout time.Duration, stop <-chan bool, command string) ( 297 <-chan []byte, <-chan error, error) { 298 args := []string{"exec", "-user=0:0"} 299 for _, c := range sandboxCaps { 300 args = append(args, "-cap", c) 301 } 302 args = append(args, inst.name) 303 args = append(args, strings.Split(command, " ")...) 304 cmd := inst.runscCmd(args...) 305 306 rpipe, wpipe, err := osutil.LongPipe() 307 if err != nil { 308 return nil, nil, err 309 } 310 defer wpipe.Close() 311 inst.merger.Add("cmd", rpipe) 312 cmd.Stdout = wpipe 313 cmd.Stderr = wpipe 314 315 guestSock, err := inst.guestProxy() 316 if err != nil { 317 return nil, nil, err 318 } 319 if guestSock != nil { 320 defer guestSock.Close() 321 cmd.Stdin = guestSock 322 } 323 324 if err := cmd.Start(); err != nil { 325 return nil, nil, err 326 } 327 errc := make(chan error, 1) 328 signal := func(err error) { 329 select { 330 case errc <- err: 331 default: 332 } 333 } 334 335 go func() { 336 select { 337 case <-time.After(timeout): 338 signal(vmimpl.ErrTimeout) 339 case <-stop: 340 signal(vmimpl.ErrTimeout) 341 case err := <-inst.merger.Err: 342 cmd.Process.Kill() 343 if cmdErr := cmd.Wait(); cmdErr == nil { 344 // If the command exited successfully, we got EOF error from merger. 345 // But in this case no error has happened and the EOF is expected. 346 err = nil 347 } 348 signal(err) 349 return 350 } 351 log.Logf(1, "stopping %s", inst.name) 352 w := make(chan bool) 353 go func() { 354 select { 355 case <-w: 356 return 357 case <-time.After(time.Minute): 358 cmd.Process.Kill() 359 } 360 }() 361 osutil.Run(time.Minute, inst.runscCmd("kill", inst.name, "9")) 362 err := cmd.Wait() 363 close(w) 364 log.Logf(1, "%s exited with %s", inst.name, err) 365 }() 366 return inst.merger.Output, errc, nil 367 } 368 369 func (inst *instance) guestProxy() (*os.File, error) { 370 if inst.port == 0 { 371 return nil, nil 372 } 373 // One does not simply let gvisor guest connect to host tcp port. 374 // We create a unix socket, pass it to guest in stdin. 375 // Guest will use it instead of dialing manager directly. 376 // On host we connect to manager tcp port and proxy between the tcp and unix connections. 377 socks, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) 378 if err != nil { 379 return nil, err 380 } 381 hostSock := os.NewFile(uintptr(socks[0]), "host unix proxy") 382 guestSock := os.NewFile(uintptr(socks[1]), "guest unix proxy") 383 conn, err := net.Dial("tcp", fmt.Sprintf("localhost:%v", inst.port)) 384 if err != nil { 385 hostSock.Close() 386 guestSock.Close() 387 return nil, err 388 } 389 go func() { 390 io.Copy(hostSock, conn) 391 hostSock.Close() 392 }() 393 go func() { 394 io.Copy(conn, hostSock) 395 conn.Close() 396 }() 397 return guestSock, nil 398 } 399 400 func (inst *instance) Diagnose(rep *report.Report) ([]byte, bool) { 401 // TODO: stacks and dmesg are mostly useful for hangs/stalls, so we could do this only sometimes based on rep. 402 b, err := osutil.Run(time.Minute, inst.runscCmd("debug", "-stacks", "--ps", inst.name)) 403 if err != nil { 404 b = append(b, fmt.Sprintf("\n\nError collecting stacks: %v", err)...) 405 } 406 b1, err := osutil.RunCmd(time.Minute, "", "dmesg") 407 b = append(b, b1...) 408 if err != nil { 409 b = append(b, fmt.Sprintf("\n\nError collecting kernel logs: %v", err)...) 410 } 411 return b, false 412 } 413 414 func init() { 415 if os.Getenv("SYZ_GVISOR_PROXY") != "" { 416 fmt.Fprint(os.Stderr, initStartMsg) 417 // If we do select{}, we can get a deadlock panic. 418 for range time.NewTicker(time.Hour).C { 419 } 420 } 421 } 422 423 const initStartMsg = "SYZKALLER INIT STARTED\n" 424 425 const configTempl = ` 426 { 427 "root": { 428 "path": "%[1]v", 429 "readonly": true 430 }, 431 "linux": { 432 "cgroupsPath": "%[3]v", 433 "resources": { 434 "cpu": { 435 "shares": 1024 436 }, 437 "memory": { 438 "limit": %[4]d, 439 "reservation": %[4]d, 440 "disableOOMKiller": false 441 } 442 }, 443 "sysctl": { 444 "fs.nr_open": "1048576" 445 } 446 }, 447 "process":{ 448 "args": ["/init"], 449 "cwd": "/tmp", 450 "env": ["SYZ_GVISOR_PROXY=1"], 451 "capabilities": { 452 "bounding": [%[2]v], 453 "effective": [%[2]v], 454 "inheritable": [%[2]v], 455 "permitted": [%[2]v], 456 "ambient": [%[2]v] 457 } 458 } 459 } 460 ` 461 462 var sandboxCaps = []string{ 463 "CAP_CHOWN", "CAP_DAC_OVERRIDE", "CAP_DAC_READ_SEARCH", "CAP_FOWNER", "CAP_FSETID", 464 "CAP_KILL", "CAP_SETGID", "CAP_SETUID", "CAP_SETPCAP", "CAP_LINUX_IMMUTABLE", 465 "CAP_NET_BIND_SERVICE", "CAP_NET_BROADCAST", "CAP_NET_ADMIN", "CAP_NET_RAW", 466 "CAP_IPC_LOCK", "CAP_IPC_OWNER", "CAP_SYS_MODULE", "CAP_SYS_RAWIO", "CAP_SYS_CHROOT", 467 "CAP_SYS_PTRACE", "CAP_SYS_PACCT", "CAP_SYS_ADMIN", "CAP_SYS_BOOT", "CAP_SYS_NICE", 468 "CAP_SYS_RESOURCE", "CAP_SYS_TIME", "CAP_SYS_TTY_CONFIG", "CAP_MKNOD", "CAP_LEASE", 469 "CAP_AUDIT_WRITE", "CAP_AUDIT_CONTROL", "CAP_SETFCAP", "CAP_MAC_OVERRIDE", "CAP_MAC_ADMIN", 470 "CAP_SYSLOG", "CAP_WAKE_ALARM", "CAP_BLOCK_SUSPEND", "CAP_AUDIT_READ", 471 }