github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/vm/qemu/snapshot_linux.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package qemu
     5  
     6  import (
     7  	"encoding/binary"
     8  	"fmt"
     9  	"net"
    10  	"path/filepath"
    11  	"sync/atomic"
    12  	"syscall"
    13  	"time"
    14  	"unsafe"
    15  
    16  	"github.com/google/syzkaller/pkg/flatrpc"
    17  	"golang.org/x/sys/unix"
    18  )
    19  
    20  type snapshot struct {
    21  	ivsListener *net.UnixListener
    22  	ivsConn     *net.UnixConn
    23  	doorbellFD  int
    24  	eventFD     int
    25  	shmemFD     int
    26  	shmem       []byte
    27  	input       []byte
    28  	header      *flatrpc.SnapshotHeaderT
    29  }
    30  
    31  func (inst *instance) snapshotClose() {
    32  	if inst.ivsListener != nil {
    33  		inst.ivsListener.Close()
    34  	}
    35  	if inst.ivsConn != nil {
    36  		inst.ivsConn.Close()
    37  	}
    38  	if inst.doorbellFD != 0 {
    39  		syscall.Close(inst.doorbellFD)
    40  	}
    41  	if inst.eventFD != 0 {
    42  		syscall.Close(inst.eventFD)
    43  	}
    44  	if inst.shmemFD != 0 {
    45  		syscall.Close(inst.shmemFD)
    46  	}
    47  	if inst.shmem != nil {
    48  		syscall.Munmap(inst.shmem)
    49  	}
    50  }
    51  
    52  func (inst *instance) snapshotEnable() ([]string, error) {
    53  	// We use ivshmem device (Inter-VM Shared Memory) for communication with the VM,
    54  	// it allows to have a shared memory region directly accessible by both host and target:
    55  	// https://www.qemu.org/docs/master/system/devices/ivshmem.html
    56  	//
    57  	// The shared memory region is not restored as part of snapshot restore since we set:
    58  	//	migrate_set_capability x-ignore-shared on
    59  	// This allows to write a new input into ivshmem before each restore.
    60  	//
    61  	// We also use doorbell (interrupt) capability of ivshmem to notify host about
    62  	// program execution completion. Doorbell also allows to send interrupts in the other direction
    63  	// (from host to target), but we don't need/use this since we arrange things such that
    64  	// snapshot restore serves as a signal to execute new input.
    65  	//
    66  	// Ideally we use a single ivshmem device for both purposes (shmem+doorbell).
    67  	// But unfortunately it seems that the doorbell device is always restored on snapshot restore
    68  	// (at least I did not find a way to make it not restored, maybe can be solved with qemu change).
    69  	// So we use 2 separate devices for these purposes.
    70  	shmemFD, err := unix.MemfdCreate("syz-qemu-shmem", 0)
    71  	if err != nil {
    72  		return nil, fmt.Errorf("qemu: memfd_create failed: %w", err)
    73  	}
    74  	inst.shmemFD = shmemFD
    75  	if err := syscall.Ftruncate(shmemFD, int64(flatrpc.ConstSnapshotShmemSize)); err != nil {
    76  		return nil, fmt.Errorf("qemu: ftruncate failed: %w", err)
    77  	}
    78  	shmem, err := syscall.Mmap(shmemFD, 0, int(flatrpc.ConstSnapshotShmemSize),
    79  		syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
    80  	if err != nil {
    81  		return nil, fmt.Errorf("qemu: shmem mmap failed: %w", err)
    82  	}
    83  	inst.shmem = shmem
    84  	inst.input = shmem[:flatrpc.ConstMaxInputSize:flatrpc.ConstMaxInputSize]
    85  	inst.header = (*flatrpc.SnapshotHeaderT)(unsafe.Pointer(&shmem[flatrpc.ConstMaxInputSize]))
    86  	shmemFile := fmt.Sprintf("/proc/%v/fd/%v", syscall.Getpid(), shmemFD)
    87  
    88  	doorbellFD, err := unix.MemfdCreate("syz-qemu-doorbell", 0)
    89  	if err != nil {
    90  		return nil, fmt.Errorf("qemu: memfd_create failed: %w", err)
    91  	}
    92  	if err := syscall.Ftruncate(doorbellFD, int64(flatrpc.ConstSnapshotDoorbellSize)); err != nil {
    93  		return nil, fmt.Errorf("qemu: ftruncate failed: %w", err)
    94  	}
    95  	inst.doorbellFD = doorbellFD
    96  
    97  	eventFD, err := unix.Eventfd(0, unix.EFD_SEMAPHORE)
    98  	if err != nil {
    99  		return nil, fmt.Errorf("qemu: eventfd failed: %w", err)
   100  	}
   101  	inst.eventFD = eventFD
   102  
   103  	sockPath := filepath.Join(inst.workdir, "ivs.sock")
   104  	ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockPath, Net: "unix"})
   105  	if err != nil {
   106  		return nil, fmt.Errorf("qemu: unix listen on %v failed: %w", sockPath, err)
   107  	}
   108  	inst.ivsListener = ln
   109  
   110  	return []string{
   111  		// migratable=on is required to take snapshots.
   112  		// tsc=off disables RDTSC timestamp counter, it's not virtualized/restored as part of snapshots,
   113  		// so the target kernel sees a large jump in time and always declares TSC as unstable after restore.
   114  		"-cpu", "host,migratable=on,tsc=off",
   115  		"-chardev", fmt.Sprintf("socket,path=%v,id=snapshot-doorbell", sockPath),
   116  		"-device", "ivshmem-doorbell,master=on,vectors=1,chardev=snapshot-doorbell",
   117  		"-device", "ivshmem-plain,master=on,memdev=snapshot-shmem",
   118  		"-object", fmt.Sprintf("memory-backend-file,size=%v,share=on,discard-data=on,id=snapshot-shmem,mem-path=%v",
   119  			uint64(flatrpc.ConstSnapshotShmemSize), shmemFile),
   120  	}, nil
   121  }
   122  
   123  func (inst *instance) snapshotHandshake() error {
   124  	// ivshmem-doorbell expects an external server that communicates via a unix socket.
   125  	// The protocol is not documented, for details see:
   126  	// https://github.com/qemu/qemu/blob/master/hw/misc/ivshmem.c
   127  	// https://github.com/qemu/qemu/blob/master/contrib/ivshmem-server/ivshmem-server.c
   128  	conn, err := inst.ivsListener.AcceptUnix()
   129  	if err != nil {
   130  		return fmt.Errorf("qemu: unix accept failed: %w", err)
   131  	}
   132  	inst.ivsListener.Close()
   133  	inst.ivsListener = nil
   134  	inst.ivsConn = conn
   135  
   136  	msg := make([]byte, 8)
   137  	// Send protocol version 0.
   138  	binary.LittleEndian.PutUint64(msg, 0)
   139  	if _, err := conn.Write(msg); err != nil {
   140  		return fmt.Errorf("qemu: ivs conn write failed: %w", err)
   141  	}
   142  	// Send VM id 0.
   143  	binary.LittleEndian.PutUint64(msg, 0)
   144  	if _, err := conn.Write(msg); err != nil {
   145  		return fmt.Errorf("qemu: ivs conn write failed: %w", err)
   146  	}
   147  	// Send shared memory file FD.
   148  	binary.LittleEndian.PutUint64(msg, ^uint64(0))
   149  	rights := syscall.UnixRights(inst.doorbellFD)
   150  	if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil {
   151  		return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err)
   152  	}
   153  	// Send event FD for VM 1 interrupt vector 0.
   154  	binary.LittleEndian.PutUint64(msg, 1)
   155  	rights = syscall.UnixRights(inst.eventFD)
   156  	if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil {
   157  		return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err)
   158  	}
   159  	return nil
   160  }
   161  
   162  func (inst *instance) SetupSnapshot(input []byte) error {
   163  	copy(inst.input, input)
   164  	// Tell executor that we are ready to snapshot and wait for an ack.
   165  	inst.header.UpdateState(flatrpc.SnapshotStateHandshake)
   166  	if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateHandshake, 10*time.Minute) {
   167  		return fmt.Errorf("executor does not start snapshot handshake\n%s", inst.readOutput())
   168  	}
   169  	if _, err := inst.hmp("migrate_set_capability x-ignore-shared on", 0); err != nil {
   170  		return err
   171  	}
   172  	if _, err := inst.hmp("savevm syz", 0); err != nil {
   173  		return err
   174  	}
   175  	if inst.debug {
   176  		inst.hmp("info snapshots", 0) // this prints size of the snapshot
   177  	}
   178  	inst.header.UpdateState(flatrpc.SnapshotStateSnapshotted)
   179  	if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateSnapshotted, time.Minute) {
   180  		return fmt.Errorf("executor has not confirmed snapshot handshake\n%s", inst.readOutput())
   181  	}
   182  	return nil
   183  }
   184  
   185  func (inst *instance) RunSnapshot(timeout time.Duration, input []byte) (result, output []byte, err error) {
   186  	copy(inst.input, input)
   187  	inst.header.OutputOffset = 0
   188  	inst.header.OutputSize = 0
   189  	inst.header.UpdateState(flatrpc.SnapshotStateExecute)
   190  	if _, err := inst.hmp("loadvm syz", 0); err != nil {
   191  		return nil, nil, fmt.Errorf("%w\n%s", err, inst.readOutput())
   192  	}
   193  	inst.waitSnapshotStateChange(flatrpc.SnapshotStateExecute, timeout)
   194  	resStart := int(flatrpc.ConstMaxInputSize) + int(atomic.LoadUint32(&inst.header.OutputOffset))
   195  	resEnd := resStart + int(atomic.LoadUint32(&inst.header.OutputSize))
   196  	var res []byte
   197  	if resEnd <= len(inst.shmem) {
   198  		res = inst.shmem[resStart:resEnd:resEnd]
   199  	}
   200  	output = inst.readOutput()
   201  	return res, output, nil
   202  }
   203  
   204  func (inst *instance) waitSnapshotStateChange(state flatrpc.SnapshotState, timeout time.Duration) bool {
   205  	deadline := time.Now().Add(timeout)
   206  	timeoutMs := int(timeout / time.Millisecond)
   207  	fds := []unix.PollFd{{
   208  		Fd:     int32(inst.eventFD),
   209  		Events: unix.POLLIN,
   210  	}}
   211  	for {
   212  		if n, _ := unix.Poll(fds, timeoutMs); n == 1 {
   213  			var buf [8]byte
   214  			syscall.Read(inst.eventFD, buf[:])
   215  		}
   216  		if inst.header.LoadState() != state {
   217  			return true
   218  		}
   219  		remain := time.Until(deadline)
   220  		if remain < time.Millisecond {
   221  			return false
   222  		}
   223  		timeoutMs = int(remain / time.Millisecond)
   224  	}
   225  }
   226  
   227  func (inst *instance) readOutput() []byte {
   228  	var output []byte
   229  	// If output channel has overflown, then wait for more output from the merger goroutine.
   230  	wait := cap(inst.merger.Output)
   231  	for {
   232  		select {
   233  		case out := <-inst.merger.Output:
   234  			output = append(output, out...)
   235  			wait--
   236  		default:
   237  			if wait > 0 {
   238  				return output
   239  			}
   240  			// After the first overflow we wait after every read because the goroutine
   241  			// may be running and sending more output to the channel concurrently.
   242  			wait = 1
   243  			time.Sleep(10 * time.Millisecond)
   244  		}
   245  	}
   246  }