github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/vm/qemu/snapshot_linux.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package qemu 5 6 import ( 7 "encoding/binary" 8 "fmt" 9 "net" 10 "path/filepath" 11 "sync/atomic" 12 "syscall" 13 "time" 14 "unsafe" 15 16 "github.com/google/syzkaller/pkg/flatrpc" 17 "golang.org/x/sys/unix" 18 ) 19 20 type snapshot struct { 21 ivsListener *net.UnixListener 22 ivsConn *net.UnixConn 23 doorbellFD int 24 eventFD int 25 shmemFD int 26 shmem []byte 27 input []byte 28 header *flatrpc.SnapshotHeaderT 29 } 30 31 func (inst *instance) snapshotClose() { 32 if inst.ivsListener != nil { 33 inst.ivsListener.Close() 34 } 35 if inst.ivsConn != nil { 36 inst.ivsConn.Close() 37 } 38 if inst.doorbellFD != 0 { 39 syscall.Close(inst.doorbellFD) 40 } 41 if inst.eventFD != 0 { 42 syscall.Close(inst.eventFD) 43 } 44 if inst.shmemFD != 0 { 45 syscall.Close(inst.shmemFD) 46 } 47 if inst.shmem != nil { 48 syscall.Munmap(inst.shmem) 49 } 50 } 51 52 func (inst *instance) snapshotEnable() ([]string, error) { 53 // We use ivshmem device (Inter-VM Shared Memory) for communication with the VM, 54 // it allows to have a shared memory region directly accessible by both host and target: 55 // https://www.qemu.org/docs/master/system/devices/ivshmem.html 56 // 57 // The shared memory region is not restored as part of snapshot restore since we set: 58 // migrate_set_capability x-ignore-shared on 59 // This allows to write a new input into ivshmem before each restore. 60 // 61 // We also use doorbell (interrupt) capability of ivshmem to notify host about 62 // program execution completion. Doorbell also allows to send interrupts in the other direction 63 // (from host to target), but we don't need/use this since we arrange things such that 64 // snapshot restore serves as a signal to execute new input. 65 // 66 // Ideally we use a single ivshmem device for both purposes (shmem+doorbell). 67 // But unfortunately it seems that the doorbell device is always restored on snapshot restore 68 // (at least I did not find a way to make it not restored, maybe can be solved with qemu change). 69 // So we use 2 separate devices for these purposes. 70 shmemFD, err := unix.MemfdCreate("syz-qemu-shmem", 0) 71 if err != nil { 72 return nil, fmt.Errorf("qemu: memfd_create failed: %w", err) 73 } 74 inst.shmemFD = shmemFD 75 if err := syscall.Ftruncate(shmemFD, int64(flatrpc.ConstSnapshotShmemSize)); err != nil { 76 return nil, fmt.Errorf("qemu: ftruncate failed: %w", err) 77 } 78 shmem, err := syscall.Mmap(shmemFD, 0, int(flatrpc.ConstSnapshotShmemSize), 79 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) 80 if err != nil { 81 return nil, fmt.Errorf("qemu: shmem mmap failed: %w", err) 82 } 83 inst.shmem = shmem 84 inst.input = shmem[:flatrpc.ConstMaxInputSize:flatrpc.ConstMaxInputSize] 85 inst.header = (*flatrpc.SnapshotHeaderT)(unsafe.Pointer(&shmem[flatrpc.ConstMaxInputSize])) 86 shmemFile := fmt.Sprintf("/proc/%v/fd/%v", syscall.Getpid(), shmemFD) 87 88 doorbellFD, err := unix.MemfdCreate("syz-qemu-doorbell", 0) 89 if err != nil { 90 return nil, fmt.Errorf("qemu: memfd_create failed: %w", err) 91 } 92 if err := syscall.Ftruncate(doorbellFD, int64(flatrpc.ConstSnapshotDoorbellSize)); err != nil { 93 return nil, fmt.Errorf("qemu: ftruncate failed: %w", err) 94 } 95 inst.doorbellFD = doorbellFD 96 97 eventFD, err := unix.Eventfd(0, unix.EFD_SEMAPHORE) 98 if err != nil { 99 return nil, fmt.Errorf("qemu: eventfd failed: %w", err) 100 } 101 inst.eventFD = eventFD 102 103 sockPath := filepath.Join(inst.workdir, "ivs.sock") 104 ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockPath, Net: "unix"}) 105 if err != nil { 106 return nil, fmt.Errorf("qemu: unix listen on %v failed: %w", sockPath, err) 107 } 108 inst.ivsListener = ln 109 110 return []string{ 111 // migratable=on is required to take snapshots. 112 // tsc=off disables RDTSC timestamp counter, it's not virtualized/restored as part of snapshots, 113 // so the target kernel sees a large jump in time and always declares TSC as unstable after restore. 114 "-cpu", "host,migratable=on,tsc=off", 115 "-chardev", fmt.Sprintf("socket,path=%v,id=snapshot-doorbell", sockPath), 116 "-device", "ivshmem-doorbell,master=on,vectors=1,chardev=snapshot-doorbell", 117 "-device", "ivshmem-plain,master=on,memdev=snapshot-shmem", 118 "-object", fmt.Sprintf("memory-backend-file,size=%v,share=on,discard-data=on,id=snapshot-shmem,mem-path=%v", 119 uint64(flatrpc.ConstSnapshotShmemSize), shmemFile), 120 }, nil 121 } 122 123 func (inst *instance) snapshotHandshake() error { 124 // ivshmem-doorbell expects an external server that communicates via a unix socket. 125 // The protocol is not documented, for details see: 126 // https://github.com/qemu/qemu/blob/master/hw/misc/ivshmem.c 127 // https://github.com/qemu/qemu/blob/master/contrib/ivshmem-server/ivshmem-server.c 128 conn, err := inst.ivsListener.AcceptUnix() 129 if err != nil { 130 return fmt.Errorf("qemu: unix accept failed: %w", err) 131 } 132 inst.ivsListener.Close() 133 inst.ivsListener = nil 134 inst.ivsConn = conn 135 136 msg := make([]byte, 8) 137 // Send protocol version 0. 138 binary.LittleEndian.PutUint64(msg, 0) 139 if _, err := conn.Write(msg); err != nil { 140 return fmt.Errorf("qemu: ivs conn write failed: %w", err) 141 } 142 // Send VM id 0. 143 binary.LittleEndian.PutUint64(msg, 0) 144 if _, err := conn.Write(msg); err != nil { 145 return fmt.Errorf("qemu: ivs conn write failed: %w", err) 146 } 147 // Send shared memory file FD. 148 binary.LittleEndian.PutUint64(msg, ^uint64(0)) 149 rights := syscall.UnixRights(inst.doorbellFD) 150 if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil { 151 return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err) 152 } 153 // Send event FD for VM 1 interrupt vector 0. 154 binary.LittleEndian.PutUint64(msg, 1) 155 rights = syscall.UnixRights(inst.eventFD) 156 if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil { 157 return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err) 158 } 159 return nil 160 } 161 162 func (inst *instance) SetupSnapshot(input []byte) error { 163 copy(inst.input, input) 164 // Tell executor that we are ready to snapshot and wait for an ack. 165 inst.header.UpdateState(flatrpc.SnapshotStateHandshake) 166 if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateHandshake, 10*time.Minute) { 167 return fmt.Errorf("executor does not start snapshot handshake\n%s", inst.readOutput()) 168 } 169 if _, err := inst.hmp("migrate_set_capability x-ignore-shared on", 0); err != nil { 170 return err 171 } 172 if _, err := inst.hmp("savevm syz", 0); err != nil { 173 return err 174 } 175 if inst.debug { 176 inst.hmp("info snapshots", 0) // this prints size of the snapshot 177 } 178 inst.header.UpdateState(flatrpc.SnapshotStateSnapshotted) 179 if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateSnapshotted, time.Minute) { 180 return fmt.Errorf("executor has not confirmed snapshot handshake\n%s", inst.readOutput()) 181 } 182 return nil 183 } 184 185 func (inst *instance) RunSnapshot(timeout time.Duration, input []byte) (result, output []byte, err error) { 186 copy(inst.input, input) 187 inst.header.OutputOffset = 0 188 inst.header.OutputSize = 0 189 inst.header.UpdateState(flatrpc.SnapshotStateExecute) 190 if _, err := inst.hmp("loadvm syz", 0); err != nil { 191 return nil, nil, fmt.Errorf("%w\n%s", err, inst.readOutput()) 192 } 193 inst.waitSnapshotStateChange(flatrpc.SnapshotStateExecute, timeout) 194 resStart := int(flatrpc.ConstMaxInputSize) + int(atomic.LoadUint32(&inst.header.OutputOffset)) 195 resEnd := resStart + int(atomic.LoadUint32(&inst.header.OutputSize)) 196 var res []byte 197 if resEnd <= len(inst.shmem) { 198 res = inst.shmem[resStart:resEnd:resEnd] 199 } 200 output = inst.readOutput() 201 return res, output, nil 202 } 203 204 func (inst *instance) waitSnapshotStateChange(state flatrpc.SnapshotState, timeout time.Duration) bool { 205 deadline := time.Now().Add(timeout) 206 timeoutMs := int(timeout / time.Millisecond) 207 fds := []unix.PollFd{{ 208 Fd: int32(inst.eventFD), 209 Events: unix.POLLIN, 210 }} 211 for { 212 if n, _ := unix.Poll(fds, timeoutMs); n == 1 { 213 var buf [8]byte 214 syscall.Read(inst.eventFD, buf[:]) 215 } 216 if inst.header.LoadState() != state { 217 return true 218 } 219 remain := time.Until(deadline) 220 if remain < time.Millisecond { 221 return false 222 } 223 timeoutMs = int(remain / time.Millisecond) 224 } 225 } 226 227 func (inst *instance) readOutput() []byte { 228 var output []byte 229 // If output channel has overflown, then wait for more output from the merger goroutine. 230 wait := cap(inst.merger.Output) 231 for { 232 select { 233 case out := <-inst.merger.Output: 234 output = append(output, out...) 235 wait-- 236 default: 237 if wait > 0 { 238 return output 239 } 240 // After the first overflow we wait after every read because the goroutine 241 // may be running and sending more output to the channel concurrently. 242 wait = 1 243 time.Sleep(10 * time.Millisecond) 244 } 245 } 246 }