github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/common_kvm_ppc64.h (about) 1 // Copyright 2020 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 #ifndef EXECUTOR_COMMON_KVM_PPC64_H 5 #define EXECUTOR_COMMON_KVM_PPC64_H 6 7 // This file is shared between executor and csource package. 8 9 // Implementation of syz_kvm_setup_cpu pseudo-syscall. 10 11 #include "kvm_ppc64le.S.h" 12 13 #define BOOK3S_INTERRUPT_SYSTEM_RESET 0x100 14 #define BOOK3S_INTERRUPT_MACHINE_CHECK 0x200 15 #define BOOK3S_INTERRUPT_DATA_STORAGE 0x300 16 #define BOOK3S_INTERRUPT_DATA_SEGMENT 0x380 17 #define BOOK3S_INTERRUPT_INST_STORAGE 0x400 18 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 19 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 20 #define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 21 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 22 #define BOOK3S_INTERRUPT_PROGRAM 0x700 23 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 24 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 25 #define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 26 #define BOOK3S_INTERRUPT_DOORBELL 0xa00 27 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 28 #define BOOK3S_INTERRUPT_TRACE 0xd00 29 #define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 30 #define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20 31 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 32 #define BOOK3S_INTERRUPT_HMI 0xe60 33 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 34 #define BOOK3S_INTERRUPT_H_VIRT 0xea0 35 #define BOOK3S_INTERRUPT_PERFMON 0xf00 36 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 37 #define BOOK3S_INTERRUPT_VSX 0xf40 38 #define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60 39 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 40 41 #define BITS_PER_LONG 64 42 #define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) 43 #define PPC_BIT(bit) (1ULL << PPC_BITLSHIFT(bit)) 44 #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) 45 46 #define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB 47 #define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB 48 #define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB 49 #define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB 50 51 #define cpu_to_be32(x) __builtin_bswap32(x) 52 #define cpu_to_be64(x) __builtin_bswap64(x) 53 #define be64_to_cpu(x) __builtin_bswap64(x) 54 55 #define LPCR_ILE PPC_BIT(38) 56 #define LPCR_UPRT PPC_BIT(41) // Use Process Table 57 #define LPCR_EVIRT PPC_BIT(42) // Enhanced Virtualisation 58 #define LPCR_HR PPC_BIT(43) // Host Radix 59 #ifndef KVM_REG_PPC_LPCR_64 60 #define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5) 61 #endif 62 63 #define PRTB_SIZE_SHIFT 12 // log2((64 << 10) / 16) 64 #define PATB_GR (1UL << 63) // guest uses radix; must match HR 65 #define PATB_HR (1UL << 63) 66 #define PRTB_MASK 0x0ffffffffffff000UL 67 68 #define ALIGNUP(p, q) ((void*)(((unsigned long)(p) + (q) - 1) & ~((q) - 1))) 69 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 70 71 #ifndef KVM_REG_PPC_DEC_EXPIRY 72 #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) 73 #endif 74 75 #ifndef KVM_PPC_CONFIGURE_V3_MMU 76 // Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 77 #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) 78 79 // Flag values for KVM_PPC_CONFIGURE_V3_MMU 80 #define KVM_PPC_MMUV3_RADIX 1 // 1 = radix mode, 0 = HPT 81 #define KVM_PPC_MMUV3_GTSE 2 // global translation shootdown enb 82 #endif 83 84 #ifndef KVM_CAP_PPC_NESTED_HV 85 #define KVM_CAP_PPC_NESTED_HV 160 86 #endif 87 88 struct kvm_text { 89 uintptr_t typ; 90 const void* text; 91 uintptr_t size; 92 }; 93 94 static int kvmppc_define_rtas_kernel_token(int vmfd, unsigned token, const char* func) 95 { 96 struct kvm_rtas_token_args args; 97 98 args.token = token; 99 strncpy(args.name, func, sizeof(args.name) - 1); 100 101 return ioctl(vmfd, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 102 } 103 104 static int kvmppc_get_one_reg(int cpufd, uint64 id, void* target) 105 { 106 struct kvm_one_reg reg = {.id = id, .addr = (uintptr_t)target}; 107 108 return ioctl(cpufd, KVM_GET_ONE_REG, ®); 109 } 110 111 static int kvmppc_set_one_reg(int cpufd, uint64 id, void* target) 112 { 113 struct kvm_one_reg reg = {.id = id, .addr = (uintptr_t)target}; 114 115 return ioctl(cpufd, KVM_SET_ONE_REG, ®); 116 } 117 118 static int kvm_vcpu_enable_cap(int cpufd, uint32 capability) 119 { 120 struct kvm_enable_cap cap = { 121 .cap = capability, 122 }; 123 return ioctl(cpufd, KVM_ENABLE_CAP, &cap); 124 } 125 126 static int kvm_vm_enable_cap(int vmfd, uint32 capability, uint64 p1, uint64 p2) 127 { 128 struct kvm_enable_cap cap = { 129 .cap = capability, 130 .flags = 0, 131 .args = {p1, p2}, 132 }; 133 return ioctl(vmfd, KVM_ENABLE_CAP, &cap); 134 } 135 136 static void dump_text(const char* mem, unsigned start, unsigned cw, uint32 debug_inst_opcode) 137 { 138 #ifdef DEBUG 139 printf("Text @%x: ", start); 140 141 for (unsigned i = 0; i < cw; ++i) { 142 uint32 w = ((uint32*)(mem + start))[i]; 143 144 printf(" %08x", w); 145 if (debug_inst_opcode && debug_inst_opcode == w) 146 break; 147 } 148 149 printf("\n"); 150 #endif 151 } 152 153 // Flags 154 #define KVM_SETUP_PPC64_LE (1 << 0) // Little endian 155 #define KVM_SETUP_PPC64_IR (1 << 1) // Paging for instructions 156 #define KVM_SETUP_PPC64_DR (1 << 2) // Paging for data 157 #define KVM_SETUP_PPC64_PR (1 << 3) // Run with MSR_PR (==usermode) 158 #define KVM_SETUP_PPC64_PID1 (1 << 4) // Set PID=1 i.e. not kernel's PID 159 160 // syz_kvm_setup_cpu(fd fd_kvmvm, cpufd fd_kvmcpu, usermem vma[24], text ptr[in, array[kvm_text, 1]], ntext len[text], flags flags[kvm_setup_flags_ppc64], opts ptr[in, array[kvm_setup_opt, 0:2]], nopt len[opts]) 161 static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7) 162 { 163 const int vmfd = a0; 164 const int cpufd = a1; 165 char* const host_mem = (char*)a2; 166 const struct kvm_text* const text_array_ptr = (struct kvm_text*)a3; 167 const uintptr_t text_count = a4; 168 uintptr_t flags = a5; 169 const uintptr_t page_size = 0x10000; // SYZ_PAGE_SIZE 170 const uintptr_t guest_mem_size = 24 * page_size; // vma[24] from dev_kvm.txt 171 unsigned long gpa_off = 0; 172 uint32 debug_inst_opcode = 0; 173 174 (void)text_count; // fuzzer can spoof count and we need just 1 text, so ignore text_count 175 const void* text = 0; 176 uintptr_t text_size = 0; 177 uint64 pid = 0; 178 uint64 lpcr = 0; 179 NONFAILING(text = text_array_ptr[0].text); 180 NONFAILING(text_size = text_array_ptr[0].size); 181 182 if (kvm_vcpu_enable_cap(cpufd, KVM_CAP_PPC_PAPR)) 183 return -1; 184 185 if (kvm_vm_enable_cap(vmfd, KVM_CAP_PPC_NESTED_HV, 1, 0)) 186 return -1; 187 188 for (uintptr_t i = 0; i < guest_mem_size / page_size; i++) { 189 struct kvm_userspace_memory_region memreg; 190 memreg.slot = i; 191 memreg.flags = 0; // can be KVM_MEM_LOG_DIRTY_PAGES but not KVM_MEM_READONLY 192 memreg.guest_phys_addr = i * page_size; 193 memreg.memory_size = page_size; 194 memreg.userspace_addr = (uintptr_t)host_mem + i * page_size; 195 if (ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg)) 196 return -1; 197 } 198 199 struct kvm_regs regs; 200 struct kvm_sregs sregs; 201 if (ioctl(cpufd, KVM_GET_SREGS, &sregs)) 202 return -1; 203 if (ioctl(cpufd, KVM_GET_REGS, ®s)) 204 return -1; 205 206 regs.msr = PPC_BIT(0); // MSR_SF == Sixty Four == 64bit 207 if (flags & KVM_SETUP_PPC64_LE) 208 regs.msr |= PPC_BIT(63); // Little endian 209 210 // PR == "problem state" == non priveledged == userspace 211 if (flags & KVM_SETUP_PPC64_PR) { 212 regs.msr |= PPC_BIT(49); 213 // When PR=1, the hardware enforces IR and DR as well. 214 flags |= KVM_SETUP_PPC64_IR | KVM_SETUP_PPC64_DR | KVM_SETUP_PPC64_PID1; 215 } 216 217 if (flags & KVM_SETUP_PPC64_IR) 218 regs.msr |= PPC_BIT(58); // IR - MMU=on for instructions 219 if (flags & KVM_SETUP_PPC64_DR) 220 regs.msr |= PPC_BIT(59); // DR - MMU=on for data 221 if (flags & KVM_SETUP_PPC64_PID1) 222 pid = 1; 223 224 // KVM HV on POWER is hard to force to exit, it will bounce between 225 // the fault handlers in KVM and the VM. Forcing all exception 226 // vectors to do software debug breakpoint ensures the exit from KVM. 227 if (kvmppc_get_one_reg(cpufd, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode)) 228 return -1; 229 230 #define VEC(x) (*((uint32*)(host_mem + (x)))) 231 VEC(BOOK3S_INTERRUPT_SYSTEM_RESET) = debug_inst_opcode; 232 VEC(BOOK3S_INTERRUPT_MACHINE_CHECK) = debug_inst_opcode; 233 VEC(BOOK3S_INTERRUPT_DATA_STORAGE) = debug_inst_opcode; 234 VEC(BOOK3S_INTERRUPT_DATA_SEGMENT) = debug_inst_opcode; 235 VEC(BOOK3S_INTERRUPT_INST_STORAGE) = debug_inst_opcode; 236 VEC(BOOK3S_INTERRUPT_INST_SEGMENT) = debug_inst_opcode; 237 VEC(BOOK3S_INTERRUPT_EXTERNAL) = debug_inst_opcode; 238 VEC(BOOK3S_INTERRUPT_EXTERNAL_HV) = debug_inst_opcode; 239 VEC(BOOK3S_INTERRUPT_ALIGNMENT) = debug_inst_opcode; 240 VEC(BOOK3S_INTERRUPT_PROGRAM) = debug_inst_opcode; 241 VEC(BOOK3S_INTERRUPT_FP_UNAVAIL) = debug_inst_opcode; 242 memcpy(host_mem + BOOK3S_INTERRUPT_DECREMENTER, kvm_ppc64_recharge_dec, sizeof(kvm_ppc64_recharge_dec) - 1); 243 VEC(BOOK3S_INTERRUPT_DECREMENTER + sizeof(kvm_ppc64_recharge_dec) - 1) = debug_inst_opcode; 244 VEC(BOOK3S_INTERRUPT_HV_DECREMENTER) = debug_inst_opcode; 245 VEC(BOOK3S_INTERRUPT_DOORBELL) = debug_inst_opcode; 246 VEC(BOOK3S_INTERRUPT_SYSCALL) = debug_inst_opcode; 247 VEC(BOOK3S_INTERRUPT_TRACE) = debug_inst_opcode; 248 VEC(BOOK3S_INTERRUPT_H_DATA_STORAGE) = debug_inst_opcode; 249 VEC(BOOK3S_INTERRUPT_H_INST_STORAGE) = debug_inst_opcode; 250 VEC(BOOK3S_INTERRUPT_H_EMUL_ASSIST) = debug_inst_opcode; 251 VEC(BOOK3S_INTERRUPT_HMI) = debug_inst_opcode; 252 VEC(BOOK3S_INTERRUPT_H_DOORBELL) = debug_inst_opcode; 253 VEC(BOOK3S_INTERRUPT_H_VIRT) = debug_inst_opcode; 254 VEC(BOOK3S_INTERRUPT_PERFMON) = debug_inst_opcode; 255 VEC(BOOK3S_INTERRUPT_ALTIVEC) = debug_inst_opcode; 256 VEC(BOOK3S_INTERRUPT_VSX) = debug_inst_opcode; 257 VEC(BOOK3S_INTERRUPT_FAC_UNAVAIL) = debug_inst_opcode; 258 VEC(BOOK3S_INTERRUPT_H_FAC_UNAVAIL) = debug_inst_opcode; 259 260 struct kvm_guest_debug dbg = {0}; 261 dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 262 263 if (ioctl(cpufd, KVM_SET_GUEST_DEBUG, &dbg)) 264 return -1; 265 266 // Exception vector occupy 128K, including "System Call Vectored" 267 gpa_off = 128 << 10; 268 269 // Set up a radix page table, the hash mode is not supported 270 if (flags & (KVM_SETUP_PPC64_IR | KVM_SETUP_PPC64_DR)) { 271 uintptr_t process_tb_off = gpa_off; 272 unsigned long process_tb_size = 1UL << (PRTB_SIZE_SHIFT + 4); 273 struct prtb_entry { 274 __be64 prtb0; 275 __be64 prtb1; 276 }* process_tb = (struct prtb_entry*)(host_mem + gpa_off); 277 278 memset(process_tb, 0xcc, process_tb_size); 279 280 // PRTB_SIZE_SHIFT is defined to use 64K for the process table 281 gpa_off += process_tb_size; 282 283 unsigned long *pgd, *pud, *pmd, *pte, i; 284 285 // Create 4 level page table, just like Linux does for PAGE_SIZE==64K, 286 // put each level to a separate page including the last level which won't 287 // need more than as we only allocate 24 pages for the entire VM. 288 uintptr_t pgd_off = gpa_off; 289 pgd = (unsigned long*)(host_mem + pgd_off); 290 gpa_off += page_size; 291 uintptr_t pud_off = gpa_off; 292 pud = (unsigned long*)(host_mem + pud_off); 293 gpa_off += page_size; 294 uintptr_t pmd_off = gpa_off; 295 pmd = (unsigned long*)(host_mem + pmd_off); 296 gpa_off += page_size; 297 uintptr_t pte_off = gpa_off; 298 pte = (unsigned long*)(host_mem + pte_off); 299 gpa_off += page_size; 300 301 memset(pgd, 0, page_size); 302 memset(pud, 0, page_size); 303 memset(pmd, 0, page_size); 304 memset(pte, 0, page_size); 305 pgd[0] = cpu_to_be64(PPC_BIT(0) | // Valid 306 (pud_off & PPC_BITMASK(4, 55)) | 307 RADIX_PUD_INDEX_SIZE); 308 pud[0] = cpu_to_be64(PPC_BIT(0) | // Valid 309 (pmd_off & PPC_BITMASK(4, 55)) | 310 RADIX_PMD_INDEX_SIZE); 311 pmd[0] = cpu_to_be64(PPC_BIT(0) | // Valid 312 (pte_off & PPC_BITMASK(4, 55)) | 313 RADIX_PTE_INDEX_SIZE); 314 315 // Map all 24 pages and allow write+execute for better coverage. 316 for (i = 0; i < 24; ++i) 317 pte[i] = cpu_to_be64(PPC_BIT(0) | // Valid 318 PPC_BIT(1) | // Leaf 319 ((i * page_size) & PPC_BITMASK(7, 51)) | 320 PPC_BIT(55) | // Reference 321 PPC_BIT(56) | // Change 322 PPC_BIT(61) | // Read permitted 323 PPC_BIT(62) | // Write permitted 324 PPC_BIT(63)); // Execute permitted 325 326 const long max_shift = 52; 327 const unsigned long rts = (max_shift - 31) & 0x1f; 328 const unsigned long rts1 = (rts >> 3) << PPC_BITLSHIFT(2); 329 const unsigned long rts2 = (rts & 7) << PPC_BITLSHIFT(58); 330 331 process_tb[0].prtb0 = cpu_to_be64(PATB_HR | rts1 | pgd_off | rts2 | RADIX_PGD_INDEX_SIZE); 332 if (pid) 333 process_tb[pid].prtb0 = cpu_to_be64(PATB_HR | rts1 | pgd_off | rts2 | RADIX_PGD_INDEX_SIZE); 334 335 // PATB_GR is not in the spec but KVM HV wants it for some reason 336 struct kvm_ppc_mmuv3_cfg cfg = { 337 .flags = KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE, 338 .process_table = (process_tb_off & PRTB_MASK) | (PRTB_SIZE_SHIFT - 12) | PATB_GR, 339 }; 340 if (ioctl(vmfd, KVM_PPC_CONFIGURE_V3_MMU, &cfg)) 341 return -1; 342 343 lpcr |= LPCR_UPRT | LPCR_HR; 344 #ifdef DEBUG 345 printf("MMUv3: flags=%lx %016lx\n", cfg.flags, cfg.process_table); 346 printf("PTRB0=%016lx PGD0=%016lx PUD0=%016lx PMD0=%016lx\n", 347 be64_to_cpu((unsigned long)process_tb[0].prtb0), be64_to_cpu((unsigned long)pgd[0]), 348 be64_to_cpu((unsigned long)pud[0]), be64_to_cpu((unsigned long)pmd[0])); 349 printf("PTEs @%lx:\n %016lx %016lx %016lx %016lx\n %016lx %016lx %016lx %016lx\n", 350 pte_off, 351 be64_to_cpu((unsigned long)pte[0]), be64_to_cpu((unsigned long)pte[1]), 352 be64_to_cpu((unsigned long)pte[2]), be64_to_cpu((unsigned long)pte[3]), 353 be64_to_cpu((unsigned long)pte[4]), be64_to_cpu((unsigned long)pte[5]), 354 be64_to_cpu((unsigned long)pte[6]), be64_to_cpu((unsigned long)pte[7])); 355 #endif 356 } 357 358 memcpy(host_mem + gpa_off, text, text_size); 359 regs.pc = gpa_off; 360 361 uintptr_t end_of_text = gpa_off + ((text_size + 3) & ~3); 362 memcpy(host_mem + end_of_text, &debug_inst_opcode, sizeof(debug_inst_opcode)); 363 364 // The code generator produces little endian instructions so swap bytes here 365 if (!(flags & KVM_SETUP_PPC64_LE)) { 366 uint32* p = (uint32*)(host_mem + gpa_off); 367 for (unsigned long i = 0; i < text_size / sizeof(*p); ++i) 368 p[i] = cpu_to_be32(p[i]); 369 370 p = (uint32*)(host_mem + BOOK3S_INTERRUPT_DECREMENTER); 371 for (unsigned long i = 0; i < sizeof(kvm_ppc64_recharge_dec) / sizeof(*p); ++i) 372 p[i] = cpu_to_be32(p[i]); 373 } else { 374 // PPC by default calls exception handlers in big endian unless ILE 375 lpcr |= LPCR_ILE; 376 } 377 378 if (ioctl(cpufd, KVM_SET_SREGS, &sregs)) 379 return -1; 380 if (ioctl(cpufd, KVM_SET_REGS, ®s)) 381 return -1; 382 if (kvmppc_set_one_reg(cpufd, KVM_REG_PPC_LPCR_64, &lpcr)) 383 return -1; 384 if (kvmppc_set_one_reg(cpufd, KVM_REG_PPC_PID, &pid)) 385 return -1; 386 387 // Hypercalls need to be enable so we enable them all here to 388 // allow fuzzing 389 #define MAX_HCALL 0x450 390 for (unsigned hcall = 4; hcall < MAX_HCALL; hcall += 4) 391 kvm_vm_enable_cap(vmfd, KVM_CAP_PPC_ENABLE_HCALL, hcall, 1); 392 393 for (unsigned hcall = 0xf000; hcall < 0xf810; hcall += 4) 394 kvm_vm_enable_cap(vmfd, KVM_CAP_PPC_ENABLE_HCALL, hcall, 1); 395 396 for (unsigned hcall = 0xef00; hcall < 0xef20; hcall += 4) 397 kvm_vm_enable_cap(vmfd, KVM_CAP_PPC_ENABLE_HCALL, hcall, 1); 398 399 // Only a few of many RTAS calls are actually in the KVM and the rest 400 // are handled in QEMU, enable the KVM handling for those 4 here. 401 kvmppc_define_rtas_kernel_token(vmfd, 1, "ibm,set-xive"); 402 kvmppc_define_rtas_kernel_token(vmfd, 2, "ibm,get-xive"); 403 kvmppc_define_rtas_kernel_token(vmfd, 3, "ibm,int-on"); 404 kvmppc_define_rtas_kernel_token(vmfd, 4, "ibm,int-off"); 405 406 dump_text(host_mem, regs.pc, 8, debug_inst_opcode); 407 dump_text(host_mem, BOOK3S_INTERRUPT_DECREMENTER, 16, debug_inst_opcode); 408 409 uint64 decr = 0x7fffffff; 410 if (kvmppc_set_one_reg(cpufd, KVM_REG_PPC_DEC_EXPIRY, &decr)) 411 return -1; 412 413 return 0; 414 } 415 416 #endif // EXECUTOR_COMMON_KVM_PPC64_H