github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ifuzz/powerpc/gen/powerisa30_to_syz (about)

     1  #! /usr/bin/env python3
     2  
     3  # Copyright 2020 syzkaller project authors. All rights reserved.
     4  # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     5  
     6  import re
     7  import sys
     8  import pprint
     9  import subprocess
    10  
    11  
    12  def add_stat(m, s):
    13  	if s in m:
    14  		m[s] += 1
    15  	else:
    16  		m[s] = 1
    17  
    18  def add_fmt_ins(m, fmt, ins):
    19  	if fmt in m:
    20  		m[fmt] += [ins]
    21  	else:
    22  		m[fmt] = [ins]
    23  
    24  pagecache = {}
    25  
    26  pdf2txt = "/home/aik/xpdf-4.03/build/xpdf/pdftotext"
    27  isa_pdf = sys.argv[1]
    28  isa_pagenum_correction = {
    29  		'vadduqm': 3,
    30  		'rldicl[.]': -1,
    31  		'dcbtst': -1,
    32  		'dcbf': -1,
    33  		'dcbz': -1,
    34  		'fmsubs[.]': -1,
    35  		'fmsub[.]': -1,
    36  		'rldicr[.]': -2,
    37  		'rldcl[.]': 1,
    38  		'rldcr[.]': 2
    39  	}
    40  
    41  def myint(s):
    42  	try:
    43  		return int(s)
    44  	except:
    45  		return None
    46  
    47  f = open("outp", 'w+')
    48  
    49  def read_pdf_page(pnum, store = False):
    50  	global pagecache
    51  
    52  	pagenum = str(pnum)
    53  	if pagenum in pagecache:
    54  		return pagecache[pagenum]
    55  
    56  	tmp = subprocess.check_output([pdf2txt, '-enc', "Latin1", "-f", pagenum, "-l", pagenum, "-nopgbrk", "-nodiag",
    57  		"-table",
    58  		isa_pdf, "-"])
    59  	tmp = tmp.decode("Latin1").split("\n")
    60  	ret = []
    61  
    62  	sps = []
    63  	for t in tmp:
    64  		if t == "":
    65  			continue
    66  		ret += [t]
    67  
    68  	if store:
    69  		off = 0
    70  		for t in range(len(ret)):
    71  			t2 = re.match(r"\b0\b\s+[\d\s]+?\b31\b", ret[t])
    72  			if t2:
    73  				if t2.start() == 0:
    74  					off = t2.end() + 2
    75  					adjoff = re.match(r'(\s+\/)', ret[t-1][off-2:off + 10])
    76  					if adjoff:
    77  						off += len(adjoff.group(1))
    78  					break
    79  				off = t2.start()
    80  
    81  				adjoff = re.match(r'(\s+\/)', ret[t-1][off:off + 10])
    82  				if adjoff:
    83  					off += len(adjoff.group(1))
    84  				break
    85  		for t in range(len(ret)):
    86  			t2 = re.search(r"\b0\b\s+[\d\s]+?\b31\b", ret[t][30:])
    87  			if t2:
    88  				off = max(off, t2.span()[0] + 30)
    89  				adjoff = re.match(r'(\s+\/)', ret[t-1][off:off + 10])
    90  				if adjoff:
    91  					off += len(adjoff.group(1))
    92  				break
    93  
    94  		for pp in ret:
    95  			if re.match(r'\(RO=[01]\)', pp[off-4:off+2]):
    96  				print("!!! fixup offset", file = sys.stderr)
    97  				off += 2
    98  
    99  		if not off:
   100  			print("!!! No idea how to split {}".format(pagenum))
   101  			sys.exit(-1)
   102  
   103  		f.write("===== page {} off={}\n".format(pagenum, off))
   104  		print("===== page {} off={}".format(pagenum, off), file = sys.stderr)
   105  		for pp in ret:
   106  			if off >= len(pp):
   107  				print("{:{}}$".format(pp, max(off, 128)), file = sys.stderr)
   108  			else:
   109  				print("{:{}}||{}$".format(pp[:off], max(off, 128), pp[off:]), file = sys.stderr)
   110  
   111  		ret2 = []
   112  		for c in ret[1:-1]:
   113  			t = c[:off].strip()
   114  			if not t:
   115  				continue
   116  			t = re.sub(r'\s+', ' ', t)
   117  			if t == "[Phased-Out]":
   118  				continue
   119  			ret2 += [t]
   120  			f.write(t + "\n")
   121  		for c in ret[1:-1]:
   122  			t = c[off:].strip()
   123  			if not t:
   124  				continue
   125  			t = re.sub(r'\s+', ' ', t)
   126  			if t == "[Phased-Out]":
   127  				continue
   128  			ret2 += [t]
   129  			f.write(t + "\n")
   130  		ret = ret2
   131  
   132  	pagecache[pagenum] = ret
   133  	return ret
   134  
   135  def find_pdf_pagenumber_offset():
   136  	opcodes_first_page = None
   137  	opcodes_last_page = None
   138  	for i in range(15, 100):
   139  		tmp = read_pdf_page(i)
   140  		if not tmp:
   141  			continue
   142  
   143  		print("Reading page {}: '{}'".format(i, tmp[-1]), file = sys.stderr)
   144  		if not opcodes_first_page:
   145  			for t in tmp:
   146  				t2 = re.match(r'.*Set Sorted by Opcode[\s\.]+(\d+)', t)
   147  				if t2:
   148  					print("Found: {}".format(t2.groups()), file = sys.stderr)
   149  					opcodes_first_page = int(t2.group(1))
   150  					continue
   151  				t2 = re.match(r'.*Set Sorted by Version[\s\.]+(\d+)', t)
   152  				if t2:
   153  					print("Found: {}".format(t2.groups()), file = sys.stderr)
   154  					opcodes_last_page = int(t2.group(1)) - 1
   155  					break
   156  
   157  		t2  = re.match(r'^(\d+)\s+.*$', tmp[-1])
   158  		if not t2:
   159  			t2  = re.match(r'.*\s+(\d+)$', tmp[-1])
   160  		if t2:
   161  			first_page = int(t2.group(1))
   162  			if first_page < 100:
   163  				first_page = i - 1
   164  				break
   165  	return first_page, opcodes_first_page, opcodes_last_page
   166  
   167  if len(sys.argv) > 4:
   168  	pageoffset, opcodes_first_page, opcodes_last_page = int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4])
   169  else:
   170  	pageoffset, opcodes_first_page, opcodes_last_page = find_pdf_pagenumber_offset()
   171  #pageoffset, opcodes_first_page, opcodes_last_page = 18, 1187, 1188
   172  print("Offset = {}, opcodes on pages {}..{} (phys pages {}..{})".format(pageoffset, opcodes_first_page, opcodes_last_page, opcodes_first_page + pageoffset, opcodes_last_page + pageoffset), file = sys.stderr)
   173  
   174  def add_mode_priv(mode, priv):
   175  	ret = ""
   176  	if "P" in priv or "H" in priv:
   177  		ret += ", Priv: true"
   178  	return ret
   179  
   180  def ppcmask(val, s, l):
   181  	return (val & ((1 << l) - 1)) << (31 - (s + l - 1))
   182  
   183  def do_sorted_opcodes(fmt_map, ins_stat):
   184  	ins_out = []
   185  
   186  	for i in range(opcodes_first_page, opcodes_last_page):
   187  		tmp = read_pdf_page(i + pageoffset)[1:-1] # skip header and page number
   188  
   189  		for line in tmp:
   190  			#print(line)
   191  			tmp = re.match(r'^\s+Instruction1\s+Format.*', line)
   192  			if tmp:
   193  				priv_off = line.find("Privilege3")
   194  				mode_off = line.find("Mode Dep4")
   195  
   196  			tmp = re.match(r'([\.01]{6})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{5})\s+([/\.01]{6})\s+(B|X|XO|D|VX|VC|VA|SC|I|XL|DX|M|D|MD|MDS|XFX|XX1|XS|DQ|DS|Z22|Z23|A|XX3|XX4|XX2|XFL)\s+(\S+)\s+(\d+)\s+(\S+)\s+(PPC|P1|P2|v2.00|v2.01|v2.02|v2.03|v2.04|v2.05|v2.06|v2.07|v3.0|v3.0B)(.*)', line)
   197  
   198  			if not tmp:
   199  				continue
   200  			ins0_5 = tmp.group(1)
   201  			ins6_10 = tmp.group(2)
   202  			ins11_15 = tmp.group(3)
   203  			ins16_20 = tmp.group(4)
   204  			ins21_25 = tmp.group(5)
   205  			ins26_31 = tmp.group(6)
   206  			print("{}: {} {} {} {} {} {}".format(tmp.group(10), ins0_5, ins6_10, ins11_15, ins16_20, ins21_25, ins26_31), file = sys.stderr)
   207  			opcode_str = ins0_5 + ins6_10 + ins11_15 + ins16_20 + ins21_25 + ins26_31
   208  			opcodemask_ = int(re.sub(r'[\.]', '0', re.sub(r'[01/]', '1', opcode_str)), 2)
   209  			opcode_ = int(re.sub(r'[\./]', '0', opcode_str), 2)
   210  			print("{:x} {:x}".format(opcode_, opcodemask_), file = sys.stderr)
   211  
   212  			fmt = tmp.group(7)
   213  			ins_book = tmp.group(8)
   214  			pagenum = int(tmp.group(9))
   215  			mnem = tmp.group(10).replace("_", "")
   216  			if mnem == "paste[.]":
   217  				mnem = "paste."
   218  			ins_isa = tmp.group(11)
   219  
   220  			priv = line[priv_off:priv_off + 4].strip()
   221  			mode = line[mode_off:mode_off + 4].strip()
   222  			ins_desc = line[mode_off + 4:].strip()
   223  
   224  			add_stat(ins_stat, "Fmt " + fmt)
   225  			add_stat(ins_stat, "Priv " + priv)
   226  			add_stat(ins_stat, "Mod " + mode)
   227  			add_fmt_ins(fmt_map, "Fmt " + fmt, mnem)
   228  
   229  			if re.match(r'^[01]+$', ins0_5 + ins6_10 + ins11_15 + ins16_20 + ins21_25 + ins26_31):
   230  				outp = 'Name: "{}", Opcode: 0x{:08x}, Mask: 0xFFFFFFFF'.format(mnem, opcode_, opcodemask_)
   231  				outp += add_mode_priv(mode, priv)
   232  				ins_out += ["\t{" + outp + "},"]
   233  				continue
   234  
   235  			if mnem in isa_pagenum_correction:
   236  				pagenum += isa_pagenum_correction[mnem]
   237  
   238  			tmp = read_pdf_page(pagenum + pageoffset, True)
   239  
   240  			#
   241  			# We are looking for a pattern like this:
   242  			#
   243  			# Trap Word Immediate                               D-form
   244  			# twi             TO,RA,SI
   245  			#     3          TO          RA               SI
   246  			# 0            6          11       16                          31
   247  			mnem_list = mnem.split('[', 1)
   248  			mnem_short = re.escape(mnem_list[0])
   249  			found = False
   250  			for n in range(len(tmp) - 1):
   251  				if mnem == tmp[n] or re.match(r"{} (FRT|FRS|FLM|FXM|CT|IH|XT|XS|BO|VR|S|WC|L|T|R|A|RA|RT|BF|BT|RB|target_addr).*".format(mnem_short), tmp[n]):
   252  					break
   253  			else:
   254  				print("!!!Error: could not find {} on page {}".format(mnem, pagenum + pageoffset), file = sys.stderr)
   255  				sys.exit(-1)
   256  
   257  			# Found the instruction(s), see now how many variants
   258  			for n1 in range(n, len(tmp)):
   259  				if re.match(r"\b0\b\s+[\d\s]+\b31\b", tmp[n1]):
   260  					#print(n1)
   261  					break
   262  			else:
   263  				print("!!!Error: could not find format for {} on page {}".format(mnem, pagenum + pageoffset), file = sys.stderr)
   264  				sys.exit(-1)
   265  
   266  			namesraw = re.sub(r'(/ )+', '/ ', tmp[n1 - 1]).upper()
   267  			namesraw = namesraw.replace('AXBXTX', "AX BX TX") # xxperm xxpermr xscmpeqdp xscmpgtdp xscmpgedp
   268  			namesraw = namesraw.replace('CXAXBXTX', "CX AX BX TX") # xxsel
   269  			namesraw = namesraw.replace('DMBXTX', "DM BX TX") # xvtstdcsp xvtstdcdp
   270  			namesraw = namesraw.replace('AXBX', "AX BX") # xscmpexpdp
   271  			namesraw = namesraw.replace('BXTX', "BX TX") # xxinsertw
   272  			namesraw = namesraw.replace('CXAX', "CX AX") # xxsel
   273  			names = namesraw.split()
   274  			numbers = [int(i) for i in tmp[n1].split()]
   275  
   276  			comment = ""
   277  			if "addpcis" == tmp[n][:7]:
   278  				# this one is just badly formatted
   279  				names = ["19", "RT", "D1", "D0", "2", "D2"]
   280  				numbers = [0, 6, 11, 16, 26, 31]
   281  			if "darn" == tmp[n][:4]:
   282  				# this one is just badly formatted
   283  				names = ["31", "RT", "///", "L", "///", "755", "/"]
   284  				numbers = [0, 6, 11, 14, 16, 21, 31]
   285  			if "copy " == tmp[n][:6] or "paste. " == tmp[n][:7]:
   286  				numbers = numbers[0:2] + numbers[3:]
   287  			elif len(numbers) > 5 and "RC" == names[4].upper() and numbers[4] == 21 and numbers[5] == 31:
   288  				# vcmpneb & co have missing bitfield offset right before the last one
   289  				comment += "fixup"
   290  				numbers = numbers[:5] + [22] + numbers[5:]
   291  
   292  			for i in range(n, n1 - 1):
   293  				predef = {}
   294  				com2 = comment
   295  				ext = re.match(r'^(\S+)\s+\S+\s+\(([^\)]+)\)$', tmp[i])
   296  				outp = '\t{'
   297  				if ext:
   298  					extbits = ext.group(2).strip()
   299  					# we have "(OE=1 Rc=0)" or "(if Rc=0)"
   300  					ee = re.sub(r'(\(|\)|if )', '', extbits, flags = re.I).split()
   301  					for e in ee:
   302  						e2 = e.split("=")
   303  						predef[e2[0].upper()] = int(e2[1])
   304  					mnem_out = ext.group(1)
   305  				else:
   306  					mnem_out = tmp[i].split()[0]
   307  				outp += 'Name: "{}", '.format(mnem_out)
   308  
   309  				check_bitmap = 0
   310  
   311  				opcode = 0
   312  				opcodemask = 0
   313  				bits = {}
   314  				print("Numbers {}  names {}  predef {}".format(numbers, names, predef), file = sys.stderr)
   315  				for j in range(len(names)):
   316  					start = numbers[j]
   317  					if j + 1 < len(numbers): # bc BO,BI,target_addr (AA=0 LK=0)
   318  						nn = numbers[j + 1] - numbers[j]
   319  						if j == len(names) - 1 and numbers[j + 1] == 31 and names[j] != "/":
   320  							nn += 1
   321  					else:
   322  						if numbers[j] != 31:
   323  							nn = 32 - numbers[j]
   324  						else:
   325  							nn = 1
   326  
   327  					for n2 in range(start, start + nn):
   328  						bit = 1 << (31 - n2)
   329  						if check_bitmap & bit:
   330  							print("!!!Error: bit {} overlap in {}".format(n2, mnem), file = sys.stderr)
   331  						check_bitmap |= bit
   332  
   333  					if names[j].upper() in predef:
   334  						if nn != 1:
   335  							print("!!!Error: bitfield {} is expected to be a single bit in {}".format(names[j], mnem), file = sys.stderr)
   336  							sys.exit(-10)
   337  						opcodemask |= ppcmask(1, start, nn)
   338  						opcode |= ppcmask(predef[names[j].upper()], start, nn)
   339  						com2 += " {}:{}({})".format(predef[names[j].upper()], start, names[j])
   340  						continue
   341  					if re.match(r'\d+', names[j]):
   342  						opcodemask |= ppcmask(0xffffffff, start, nn)
   343  						opcode |= ppcmask(int(names[j]), start, nn)
   344  						com2 += ' {}:{}..{}'.format(names[j], start, start + nn - 1)
   345  						continue
   346  					if re.match(r'/+', names[j]):
   347  						opcodemask |= ppcmask(0xffffffff, start, nn)
   348  						com2 += ' 0:{}..{}(///)'.format(start, start + nn - 1)
   349  						continue
   350  					# MD-form needs special handling
   351  					if names[j].upper() in ["ME", "MB"] and nn == 6:
   352  						bits[names[j].upper()] = [(start, nn - 1), (start + nn - 1, 1)]
   353  					elif names[j].upper() in ["SH"]:
   354  						if names[j].upper() not in bits:
   355  							bits[names[j].upper()] = []
   356  						bits[names[j].upper()] += [(start, nn)]
   357  					elif names[j].upper() in bits:
   358  						print("!!!Error: unsupported layout in {}".format(mnem), file = sys.stderr)
   359  						sys.exit(-1)
   360  					else:
   361  						bits[names[j].upper()] = [(start, nn)]
   362  
   363  				if check_bitmap != 0xFFFFFFFF:
   364  					com2 += " 0x{:08X}".format(check_bitmap)
   365  
   366  				opcodemask |= opcodemask_
   367  				if (opcode | opcodemask) != opcodemask:
   368  					diffmask = (opcode | opcodemask) ^ opcodemask
   369  					print("!!!Warning: check opcode calculation {}/{}: {:x} vs {:x} diff {:b} {}".format(
   370  						mnem, mnem_out, opcode, opcodemask, diffmask, names),
   371  						file = sys.stderr)
   372  					opcodemask |= diffmask
   373  
   374  				outp += "Opcode: 0x{:08x}, Mask: 0x{:08x}".format(opcode, opcodemask)
   375  				outp += add_mode_priv(mode, priv)
   376  				outp += ", Fields: []powerpc.InsnField{"
   377  				if len(bits) > 0:
   378  					for k, bb in sorted(bits.items()):
   379  						outp += '{{Name: "{}", Bits: []powerpc.InsnBits{{'.format(k)
   380  						outptmp = ""
   381  						for v in bb:
   382  							outptmp += '{{{}, {}}}, '.format(v[0], v[1])
   383  						outp +=	outptmp[:-2] + '}}, '
   384  					outp = outp[:-2]
   385  				outp += "}"
   386  				outp += "},"
   387  				ins_out += [outp]
   388  
   389  	for outp in sorted(ins_out):
   390  		print(outp)
   391  
   392  	return len(ins_out)
   393  
   394  ins_stat = {}
   395  fmt_map = {}
   396  
   397  print('// Code generated by {}. DO NOT EDIT.'.format(sys.argv[0]))
   398  print('')
   399  print('//go:build !codeanalysis')
   400  print('// +build !codeanalysis')
   401  print('')
   402  print('package generated')
   403  print('')
   404  print('import "github.com/google/syzkaller/pkg/ifuzz/powerpc"')
   405  print('')
   406  print('func init() {')
   407  print('\tpowerpc.Register(insns)')
   408  print('}')
   409  print('')
   410  print('var insns = []*powerpc.Insn{')
   411  
   412  ins_num = do_sorted_opcodes(fmt_map, ins_stat)
   413  
   414  print("}")
   415  
   416  print("Total {} instructions".format(ins_num), file = sys.stderr)
   417  ins_num = 0
   418  for k, v in sorted(ins_stat.items()):
   419  	print("\t{}: {} / {}".format(k if k else "-", v, len(fmt_map[k]) if k in fmt_map else "-"), file = sys.stderr)
   420  	ins_num += v
   421  print("Sorted {} instructions".format(ins_num/3), file = sys.stderr)