github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ifuzz/powerpc/gen/powerisa30_to_syz (about) 1 #! /usr/bin/env python3 2 3 # Copyright 2020 syzkaller project authors. All rights reserved. 4 # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 5 6 import re 7 import sys 8 import pprint 9 import subprocess 10 11 12 def add_stat(m, s): 13 if s in m: 14 m[s] += 1 15 else: 16 m[s] = 1 17 18 def add_fmt_ins(m, fmt, ins): 19 if fmt in m: 20 m[fmt] += [ins] 21 else: 22 m[fmt] = [ins] 23 24 pagecache = {} 25 26 pdf2txt = "/home/aik/xpdf-4.03/build/xpdf/pdftotext" 27 isa_pdf = sys.argv[1] 28 isa_pagenum_correction = { 29 'vadduqm': 3, 30 'rldicl[.]': -1, 31 'dcbtst': -1, 32 'dcbf': -1, 33 'dcbz': -1, 34 'fmsubs[.]': -1, 35 'fmsub[.]': -1, 36 'rldicr[.]': -2, 37 'rldcl[.]': 1, 38 'rldcr[.]': 2 39 } 40 41 def myint(s): 42 try: 43 return int(s) 44 except: 45 return None 46 47 f = open("outp", 'w+') 48 49 def read_pdf_page(pnum, store = False): 50 global pagecache 51 52 pagenum = str(pnum) 53 if pagenum in pagecache: 54 return pagecache[pagenum] 55 56 tmp = subprocess.check_output([pdf2txt, '-enc', "Latin1", "-f", pagenum, "-l", pagenum, "-nopgbrk", "-nodiag", 57 "-table", 58 isa_pdf, "-"]) 59 tmp = tmp.decode("Latin1").split("\n") 60 ret = [] 61 62 sps = [] 63 for t in tmp: 64 if t == "": 65 continue 66 ret += [t] 67 68 if store: 69 off = 0 70 for t in range(len(ret)): 71 t2 = re.match(r"\b0\b\s+[\d\s]+?\b31\b", ret[t]) 72 if t2: 73 if t2.start() == 0: 74 off = t2.end() + 2 75 adjoff = re.match(r'(\s+\/)', ret[t-1][off-2:off + 10]) 76 if adjoff: 77 off += len(adjoff.group(1)) 78 break 79 off = t2.start() 80 81 adjoff = re.match(r'(\s+\/)', ret[t-1][off:off + 10]) 82 if adjoff: 83 off += len(adjoff.group(1)) 84 break 85 for t in range(len(ret)): 86 t2 = re.search(r"\b0\b\s+[\d\s]+?\b31\b", ret[t][30:]) 87 if t2: 88 off = max(off, t2.span()[0] + 30) 89 adjoff = re.match(r'(\s+\/)', ret[t-1][off:off + 10]) 90 if adjoff: 91 off += len(adjoff.group(1)) 92 break 93 94 for pp in ret: 95 if re.match(r'\(RO=[01]\)', pp[off-4:off+2]): 96 print("!!! fixup offset", file = sys.stderr) 97 off += 2 98 99 if not off: 100 print("!!! No idea how to split {}".format(pagenum)) 101 sys.exit(-1) 102 103 f.write("===== page {} off={}\n".format(pagenum, off)) 104 print("===== page {} off={}".format(pagenum, off), file = sys.stderr) 105 for pp in ret: 106 if off >= len(pp): 107 print("{:{}}$".format(pp, max(off, 128)), file = sys.stderr) 108 else: 109 print("{:{}}||{}$".format(pp[:off], max(off, 128), pp[off:]), file = sys.stderr) 110 111 ret2 = [] 112 for c in ret[1:-1]: 113 t = c[:off].strip() 114 if not t: 115 continue 116 t = re.sub(r'\s+', ' ', t) 117 if t == "[Phased-Out]": 118 continue 119 ret2 += [t] 120 f.write(t + "\n") 121 for c in ret[1:-1]: 122 t = c[off:].strip() 123 if not t: 124 continue 125 t = re.sub(r'\s+', ' ', t) 126 if t == "[Phased-Out]": 127 continue 128 ret2 += [t] 129 f.write(t + "\n") 130 ret = ret2 131 132 pagecache[pagenum] = ret 133 return ret 134 135 def find_pdf_pagenumber_offset(): 136 opcodes_first_page = None 137 opcodes_last_page = None 138 for i in range(15, 100): 139 tmp = read_pdf_page(i) 140 if not tmp: 141 continue 142 143 print("Reading page {}: '{}'".format(i, tmp[-1]), file = sys.stderr) 144 if not opcodes_first_page: 145 for t in tmp: 146 t2 = re.match(r'.*Set Sorted by Opcode[\s\.]+(\d+)', t) 147 if t2: 148 print("Found: {}".format(t2.groups()), file = sys.stderr) 149 opcodes_first_page = int(t2.group(1)) 150 continue 151 t2 = re.match(r'.*Set Sorted by Version[\s\.]+(\d+)', t) 152 if t2: 153 print("Found: {}".format(t2.groups()), file = sys.stderr) 154 opcodes_last_page = int(t2.group(1)) - 1 155 break 156 157 t2 = re.match(r'^(\d+)\s+.*$', tmp[-1]) 158 if not t2: 159 t2 = re.match(r'.*\s+(\d+)$', tmp[-1]) 160 if t2: 161 first_page = int(t2.group(1)) 162 if first_page < 100: 163 first_page = i - 1 164 break 165 return first_page, opcodes_first_page, opcodes_last_page 166 167 if len(sys.argv) > 4: 168 pageoffset, opcodes_first_page, opcodes_last_page = int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]) 169 else: 170 pageoffset, opcodes_first_page, opcodes_last_page = find_pdf_pagenumber_offset() 171 #pageoffset, opcodes_first_page, opcodes_last_page = 18, 1187, 1188 172 print("Offset = {}, opcodes on pages {}..{} (phys pages {}..{})".format(pageoffset, opcodes_first_page, opcodes_last_page, opcodes_first_page + pageoffset, opcodes_last_page + pageoffset), file = sys.stderr) 173 174 def add_mode_priv(mode, priv): 175 ret = "" 176 if "P" in priv or "H" in priv: 177 ret += ", Priv: true" 178 return ret 179 180 def ppcmask(val, s, l): 181 return (val & ((1 << l) - 1)) << (31 - (s + l - 1)) 182 183 def do_sorted_opcodes(fmt_map, ins_stat): 184 ins_out = [] 185 186 for i in range(opcodes_first_page, opcodes_last_page): 187 tmp = read_pdf_page(i + pageoffset)[1:-1] # skip header and page number 188 189 for line in tmp: 190 #print(line) 191 tmp = re.match(r'^\s+Instruction1\s+Format.*', line) 192 if tmp: 193 priv_off = line.find("Privilege3") 194 mode_off = line.find("Mode Dep4") 195 196 tmp = re.match(r'([\.01]{6})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{6})\s+(B|X|XO|D|VX|VC|VA|SC|I|XL|DX|M|D|MD|MDS|XFX|XX1|XS|DQ|DS|Z22|Z23|A|XX3|XX4|XX2|XFL)\s+(\S+)\s+(\d+)\s+(\S+)\s+(PPC|P1|P2|v2.00|v2.01|v2.02|v2.03|v2.04|v2.05|v2.06|v2.07|v3.0|v3.0B)(.*)', line) 197 198 if not tmp: 199 continue 200 ins0_5 = tmp.group(1) 201 ins6_10 = tmp.group(2) 202 ins11_15 = tmp.group(3) 203 ins16_20 = tmp.group(4) 204 ins21_25 = tmp.group(5) 205 ins26_31 = tmp.group(6) 206 print("{}: {} {} {} {} {} {}".format(tmp.group(10), ins0_5, ins6_10, ins11_15, ins16_20, ins21_25, ins26_31), file = sys.stderr) 207 opcode_str = ins0_5 + ins6_10 + ins11_15 + ins16_20 + ins21_25 + ins26_31 208 opcodemask_ = int(re.sub(r'[\.]', '0', re.sub(r'[01/]', '1', opcode_str)), 2) 209 opcode_ = int(re.sub(r'[\./]', '0', opcode_str), 2) 210 print("{:x} {:x}".format(opcode_, opcodemask_), file = sys.stderr) 211 212 fmt = tmp.group(7) 213 ins_book = tmp.group(8) 214 pagenum = int(tmp.group(9)) 215 mnem = tmp.group(10).replace("_", "") 216 if mnem == "paste[.]": 217 mnem = "paste." 218 ins_isa = tmp.group(11) 219 220 priv = line[priv_off:priv_off + 4].strip() 221 mode = line[mode_off:mode_off + 4].strip() 222 ins_desc = line[mode_off + 4:].strip() 223 224 add_stat(ins_stat, "Fmt " + fmt) 225 add_stat(ins_stat, "Priv " + priv) 226 add_stat(ins_stat, "Mod " + mode) 227 add_fmt_ins(fmt_map, "Fmt " + fmt, mnem) 228 229 if re.match(r'^[01]+$', ins0_5 + ins6_10 + ins11_15 + ins16_20 + ins21_25 + ins26_31): 230 outp = 'Name: "{}", Opcode: 0x{:08x}, Mask: 0xFFFFFFFF'.format(mnem, opcode_, opcodemask_) 231 outp += add_mode_priv(mode, priv) 232 ins_out += ["\t{" + outp + "},"] 233 continue 234 235 if mnem in isa_pagenum_correction: 236 pagenum += isa_pagenum_correction[mnem] 237 238 tmp = read_pdf_page(pagenum + pageoffset, True) 239 240 # 241 # We are looking for a pattern like this: 242 # 243 # Trap Word Immediate D-form 244 # twi TO,RA,SI 245 # 3 TO RA SI 246 # 0 6 11 16 31 247 mnem_list = mnem.split('[', 1) 248 mnem_short = re.escape(mnem_list[0]) 249 found = False 250 for n in range(len(tmp) - 1): 251 if mnem == tmp[n] or re.match(r"{} (FRT|FRS|FLM|FXM|CT|IH|XT|XS|BO|VR|S|WC|L|T|R|A|RA|RT|BF|BT|RB|target_addr).*".format(mnem_short), tmp[n]): 252 break 253 else: 254 print("!!!Error: could not find {} on page {}".format(mnem, pagenum + pageoffset), file = sys.stderr) 255 sys.exit(-1) 256 257 # Found the instruction(s), see now how many variants 258 for n1 in range(n, len(tmp)): 259 if re.match(r"\b0\b\s+[\d\s]+\b31\b", tmp[n1]): 260 #print(n1) 261 break 262 else: 263 print("!!!Error: could not find format for {} on page {}".format(mnem, pagenum + pageoffset), file = sys.stderr) 264 sys.exit(-1) 265 266 namesraw = re.sub(r'(/ )+', '/ ', tmp[n1 - 1]).upper() 267 namesraw = namesraw.replace('AXBXTX', "AX BX TX") # xxperm xxpermr xscmpeqdp xscmpgtdp xscmpgedp 268 namesraw = namesraw.replace('CXAXBXTX', "CX AX BX TX") # xxsel 269 namesraw = namesraw.replace('DMBXTX', "DM BX TX") # xvtstdcsp xvtstdcdp 270 namesraw = namesraw.replace('AXBX', "AX BX") # xscmpexpdp 271 namesraw = namesraw.replace('BXTX', "BX TX") # xxinsertw 272 namesraw = namesraw.replace('CXAX', "CX AX") # xxsel 273 names = namesraw.split() 274 numbers = [int(i) for i in tmp[n1].split()] 275 276 comment = "" 277 if "addpcis" == tmp[n][:7]: 278 # this one is just badly formatted 279 names = ["19", "RT", "D1", "D0", "2", "D2"] 280 numbers = [0, 6, 11, 16, 26, 31] 281 if "darn" == tmp[n][:4]: 282 # this one is just badly formatted 283 names = ["31", "RT", "///", "L", "///", "755", "/"] 284 numbers = [0, 6, 11, 14, 16, 21, 31] 285 if "copy " == tmp[n][:6] or "paste. " == tmp[n][:7]: 286 numbers = numbers[0:2] + numbers[3:] 287 elif len(numbers) > 5 and "RC" == names[4].upper() and numbers[4] == 21 and numbers[5] == 31: 288 # vcmpneb & co have missing bitfield offset right before the last one 289 comment += "fixup" 290 numbers = numbers[:5] + [22] + numbers[5:] 291 292 for i in range(n, n1 - 1): 293 predef = {} 294 com2 = comment 295 ext = re.match(r'^(\S+)\s+\S+\s+\(([^\)]+)\)$', tmp[i]) 296 outp = '\t{' 297 if ext: 298 extbits = ext.group(2).strip() 299 # we have "(OE=1 Rc=0)" or "(if Rc=0)" 300 ee = re.sub(r'(\(|\)|if )', '', extbits, flags = re.I).split() 301 for e in ee: 302 e2 = e.split("=") 303 predef[e2[0].upper()] = int(e2[1]) 304 mnem_out = ext.group(1) 305 else: 306 mnem_out = tmp[i].split()[0] 307 outp += 'Name: "{}", '.format(mnem_out) 308 309 check_bitmap = 0 310 311 opcode = 0 312 opcodemask = 0 313 bits = {} 314 print("Numbers {} names {} predef {}".format(numbers, names, predef), file = sys.stderr) 315 for j in range(len(names)): 316 start = numbers[j] 317 if j + 1 < len(numbers): # bc BO,BI,target_addr (AA=0 LK=0) 318 nn = numbers[j + 1] - numbers[j] 319 if j == len(names) - 1 and numbers[j + 1] == 31 and names[j] != "/": 320 nn += 1 321 else: 322 if numbers[j] != 31: 323 nn = 32 - numbers[j] 324 else: 325 nn = 1 326 327 for n2 in range(start, start + nn): 328 bit = 1 << (31 - n2) 329 if check_bitmap & bit: 330 print("!!!Error: bit {} overlap in {}".format(n2, mnem), file = sys.stderr) 331 check_bitmap |= bit 332 333 if names[j].upper() in predef: 334 if nn != 1: 335 print("!!!Error: bitfield {} is expected to be a single bit in {}".format(names[j], mnem), file = sys.stderr) 336 sys.exit(-10) 337 opcodemask |= ppcmask(1, start, nn) 338 opcode |= ppcmask(predef[names[j].upper()], start, nn) 339 com2 += " {}:{}({})".format(predef[names[j].upper()], start, names[j]) 340 continue 341 if re.match(r'\d+', names[j]): 342 opcodemask |= ppcmask(0xffffffff, start, nn) 343 opcode |= ppcmask(int(names[j]), start, nn) 344 com2 += ' {}:{}..{}'.format(names[j], start, start + nn - 1) 345 continue 346 if re.match(r'/+', names[j]): 347 opcodemask |= ppcmask(0xffffffff, start, nn) 348 com2 += ' 0:{}..{}(///)'.format(start, start + nn - 1) 349 continue 350 # MD-form needs special handling 351 if names[j].upper() in ["ME", "MB"] and nn == 6: 352 bits[names[j].upper()] = [(start, nn - 1), (start + nn - 1, 1)] 353 elif names[j].upper() in ["SH"]: 354 if names[j].upper() not in bits: 355 bits[names[j].upper()] = [] 356 bits[names[j].upper()] += [(start, nn)] 357 elif names[j].upper() in bits: 358 print("!!!Error: unsupported layout in {}".format(mnem), file = sys.stderr) 359 sys.exit(-1) 360 else: 361 bits[names[j].upper()] = [(start, nn)] 362 363 if check_bitmap != 0xFFFFFFFF: 364 com2 += " 0x{:08X}".format(check_bitmap) 365 366 opcodemask |= opcodemask_ 367 if (opcode | opcodemask) != opcodemask: 368 diffmask = (opcode | opcodemask) ^ opcodemask 369 print("!!!Warning: check opcode calculation {}/{}: {:x} vs {:x} diff {:b} {}".format( 370 mnem, mnem_out, opcode, opcodemask, diffmask, names), 371 file = sys.stderr) 372 opcodemask |= diffmask 373 374 outp += "Opcode: 0x{:08x}, Mask: 0x{:08x}".format(opcode, opcodemask) 375 outp += add_mode_priv(mode, priv) 376 outp += ", Fields: []powerpc.InsnField{" 377 if len(bits) > 0: 378 for k, bb in sorted(bits.items()): 379 outp += '{{Name: "{}", Bits: []powerpc.InsnBits{{'.format(k) 380 outptmp = "" 381 for v in bb: 382 outptmp += '{{{}, {}}}, '.format(v[0], v[1]) 383 outp += outptmp[:-2] + '}}, ' 384 outp = outp[:-2] 385 outp += "}" 386 outp += "}," 387 ins_out += [outp] 388 389 for outp in sorted(ins_out): 390 print(outp) 391 392 return len(ins_out) 393 394 ins_stat = {} 395 fmt_map = {} 396 397 print('// Code generated by {}. DO NOT EDIT.'.format(sys.argv[0])) 398 print('') 399 print('//go:build !codeanalysis') 400 print('// +build !codeanalysis') 401 print('') 402 print('package generated') 403 print('') 404 print('import "github.com/google/syzkaller/pkg/ifuzz/powerpc"') 405 print('') 406 print('func init() {') 407 print('\tpowerpc.Register(insns)') 408 print('}') 409 print('') 410 print('var insns = []*powerpc.Insn{') 411 412 ins_num = do_sorted_opcodes(fmt_map, ins_stat) 413 414 print("}") 415 416 print("Total {} instructions".format(ins_num), file = sys.stderr) 417 ins_num = 0 418 for k, v in sorted(ins_stat.items()): 419 print("\t{}: {} / {}".format(k if k else "-", v, len(fmt_map[k]) if k in fmt_map else "-"), file = sys.stderr) 420 ins_num += v 421 print("Sorted {} instructions".format(ins_num/3), file = sys.stderr)