github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/ifuzz/x86/gen/gen.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // gen generates instruction tables (ifuzz_types/insns.go) from Intel XED tables.
     5  // Tables used to generate insns.go are checked in in all-enc-instructions.txt.
     6  package main
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"errors"
    12  	"fmt"
    13  	"os"
    14  	"reflect"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/google/syzkaller/pkg/ifuzz/iset"
    19  	"github.com/google/syzkaller/pkg/ifuzz/x86"
    20  	"github.com/google/syzkaller/pkg/osutil"
    21  	"github.com/google/syzkaller/pkg/serializer"
    22  	"github.com/google/syzkaller/pkg/tool"
    23  )
    24  
    25  // nolint: gocyclo, gocognit, funlen, dupl
    26  func main() {
    27  	if len(os.Args) != 3 {
    28  		tool.Failf("usage: gen instructions.txt output.file")
    29  	}
    30  	f, err := os.Open(os.Args[1])
    31  	if err != nil {
    32  		tool.Failf("failed to open input file: %v", err)
    33  	}
    34  	defer f.Close()
    35  
    36  	skipped := 0
    37  	saved := ""
    38  	var insns []*x86.Insn
    39  	var insn, insn1 *x86.Insn
    40  	s := bufio.NewScanner(f)
    41  	for i := 1; s.Scan(); i++ {
    42  		reportError := func(msg string, args ...interface{}) {
    43  			fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text())
    44  			tool.Failf(msg, args...)
    45  		}
    46  		line := s.Text()
    47  		if comment := strings.IndexByte(line, '#'); comment != -1 {
    48  			line = line[:comment]
    49  		}
    50  		line = strings.TrimSpace(line)
    51  		if line == "" {
    52  			continue
    53  		}
    54  		if line[len(line)-1] == '\\' {
    55  			saved += line[:len(line)-1]
    56  			continue
    57  		}
    58  		line = saved + line
    59  		saved = ""
    60  		if line == "{" {
    61  			insn = new(x86.Insn)
    62  			continue
    63  		}
    64  		if line == "}" {
    65  			if insn1 != nil {
    66  				insns = append(insns, insn1)
    67  				insn1 = nil
    68  				insn = nil
    69  			}
    70  			continue
    71  		}
    72  		colon := strings.IndexByte(line, ':')
    73  		if colon == -1 {
    74  			reportError("no colon")
    75  		}
    76  		name := strings.TrimSpace(line[:colon])
    77  		if name == "" {
    78  			reportError("empty attribute name")
    79  		}
    80  		var vals []string
    81  		for _, v := range strings.Split(line[colon+1:], " ") {
    82  			v = strings.TrimSpace(v)
    83  			if v == "" {
    84  				continue
    85  			}
    86  			vals = append(vals, v)
    87  		}
    88  		switch name {
    89  		case "ICLASS":
    90  			if len(vals) != 1 {
    91  				reportError("ICLASS has more than one value")
    92  			}
    93  			insn.Name = vals[0]
    94  		case "CPL":
    95  			if len(vals) != 1 {
    96  				reportError("CPL has more than one value")
    97  			}
    98  			if vals[0] != "0" && vals[0] != "3" {
    99  				reportError("unknown CPL value: %v", vals[0])
   100  			}
   101  			insn.Priv = vals[0] == "0"
   102  		case "EXTENSION":
   103  			if len(vals) != 1 {
   104  				reportError("EXTENSION has more than one value")
   105  			}
   106  			insn.Extension = vals[0]
   107  			switch insn.Extension {
   108  			case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER":
   109  				insn.Mode = 1<<iset.ModeLong64 | 1<<iset.ModeProt32
   110  			}
   111  			insn.Avx2Gather = insn.Extension == "AVX2GATHER"
   112  		case "PATTERN":
   113  			if insn1 != nil {
   114  				insns = append(insns, insn1)
   115  			}
   116  			insn1 = new(x86.Insn)
   117  			*insn1 = *insn
   118  			if err := parsePattern(insn1, vals); err != nil {
   119  				var errSkip errSkip
   120  				if !errors.As(err, &errSkip) {
   121  					reportError(errSkip.Error())
   122  				}
   123  				if err.Error() != "" {
   124  					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
   125  				}
   126  				skipped++
   127  				insn1 = nil
   128  			}
   129  		case "OPERANDS":
   130  			if insn1 == nil {
   131  				break
   132  			}
   133  			if err := parseOperands(insn1, vals); err != nil {
   134  				var errSkip errSkip
   135  				if !errors.As(err, &errSkip) {
   136  					reportError(errSkip.Error())
   137  				}
   138  				if err.Error() != "" {
   139  					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
   140  				}
   141  				skipped++
   142  				insn1 = nil
   143  			}
   144  		}
   145  	}
   146  
   147  	var deduped []*x86.Insn
   148  nextInsn:
   149  	for _, insn := range insns {
   150  		if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" {
   151  			skipped++
   152  			continue
   153  		}
   154  		mod0 := insn.Mod
   155  		for j := len(deduped) - 1; j >= 0; j-- {
   156  			insn1 := deduped[j]
   157  			if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 {
   158  				insn.Mod = insn1.Mod
   159  			}
   160  			if reflect.DeepEqual(insn, insn1) {
   161  				if insn.Mod != mod0 {
   162  					insn1.Mod = -1
   163  				}
   164  				continue nextInsn
   165  			}
   166  			insn.Mod = mod0
   167  		}
   168  		deduped = append(deduped, insn)
   169  	}
   170  	fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped))
   171  	insns = deduped
   172  
   173  	out := new(bytes.Buffer)
   174  	fmt.Fprintf(out, `
   175  // Code generated by pkg/ifuzz/x86/gen. DO NOT EDIT.
   176  
   177  //go:build !codeanalysis
   178  
   179  package generated
   180  
   181  import . "github.com/google/syzkaller/pkg/ifuzz/x86"
   182  
   183  func init() {
   184  	Register(insns)
   185  }
   186  
   187  var insns =
   188  `)
   189  	serializer.Write(out, insns)
   190  	if err := osutil.WriteFileAtomically(os.Args[2], out.Bytes()); err != nil {
   191  		tool.Fail(err)
   192  	}
   193  
   194  	fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped)
   195  }
   196  
   197  type errSkip string
   198  
   199  func (err errSkip) Error() string {
   200  	return string(err)
   201  }
   202  
   203  // nolint: gocyclo, gocognit, funlen
   204  func parsePattern(insn *x86.Insn, vals []string) error {
   205  	if insn.Opcode != nil {
   206  		return fmt.Errorf("PATTERN is already parsed for the instruction")
   207  	}
   208  	// As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix.
   209  	if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" ||
   210  		insn.Name == "NOP8" || insn.Name == "NOP9" {
   211  		return errSkip("")
   212  	}
   213  	if insn.Mode == 0 {
   214  		insn.Mode = 1<<iset.ModeLast - 1
   215  	}
   216  	insn.Mod = -100
   217  	insn.Reg = -100
   218  	insn.Rm = -100
   219  	insn.VexP = -1
   220  	for _, v := range vals {
   221  		switch {
   222  		case strings.HasPrefix(v, "0x"):
   223  			op, err := strconv.ParseUint(v, 0, 8)
   224  			if err != nil {
   225  				return fmt.Errorf("failed to parse hex pattern: %v", v)
   226  			}
   227  			if !insn.Modrm {
   228  				insn.Opcode = append(insn.Opcode, byte(op))
   229  			} else {
   230  				insn.Suffix = append(insn.Suffix, byte(op))
   231  			}
   232  		case strings.HasPrefix(v, "0b"):
   233  			if len(v) != 8 || v[6] != '_' {
   234  				return fmt.Errorf("failed to parse bin pattern: %v", v)
   235  			}
   236  			var op byte
   237  			if v[2] == '1' {
   238  				op |= 1 << 7
   239  			}
   240  			if v[3] == '1' {
   241  				op |= 1 << 6
   242  			}
   243  			if v[4] == '1' {
   244  				op |= 1 << 5
   245  			}
   246  			if v[5] == '1' {
   247  				op |= 1 << 4
   248  			}
   249  			if v[7] == '1' {
   250  				op |= 1 << 3
   251  			}
   252  			insn.Opcode = append(insn.Opcode, op)
   253  		case strings.HasPrefix(v, "MOD["):
   254  			insn.Modrm = true
   255  			vv, err := parseModrm(v[3:])
   256  			if err != nil {
   257  				return fmt.Errorf("failed to parse %v: %w", v, err)
   258  			}
   259  			insn.Mod = vv
   260  		case strings.HasPrefix(v, "REG["):
   261  			insn.Modrm = true
   262  			vv, err := parseModrm(v[3:])
   263  			if err != nil {
   264  				return fmt.Errorf("failed to parse %v: %w", v, err)
   265  			}
   266  			insn.Reg = vv
   267  		case strings.HasPrefix(v, "RM["):
   268  			insn.Modrm = true
   269  			vv, err := parseModrm(v[2:])
   270  			if err != nil {
   271  				return fmt.Errorf("failed to parse %v: %w", v, err)
   272  			}
   273  			insn.Rm = vv
   274  		case v == "RM=4":
   275  			insn.Rm = 4
   276  		case strings.HasPrefix(v, "SRM["):
   277  			vv, err := parseModrm(v[3:])
   278  			if err != nil {
   279  				return fmt.Errorf("failed to parse %v: %w", v, err)
   280  			}
   281  			insn.Rm = vv
   282  			insn.Srm = true
   283  		case v == "SRM=0", v == "SRM!=0":
   284  		case v == "MOD!=3":
   285  			if !insn.Modrm || insn.Mod != -1 {
   286  				return fmt.Errorf("MOD!=3 without MOD")
   287  			}
   288  			insn.Mod = -3
   289  		case v == "MOD=3":
   290  			// Most other instructions contain "MOD[0b11] MOD=3",
   291  			// but BNDCL contains "MOD[mm] MOD=3"
   292  			insn.Mod = 3
   293  		case v == "MOD=0":
   294  			insn.Mod = 0
   295  		case v == "MOD=1":
   296  			insn.Mod = 1
   297  		case v == "MOD=2":
   298  			insn.Mod = 2
   299  		case v == "lock_prefix":
   300  			insn.Prefix = append(insn.Prefix, 0xF0)
   301  
   302  		// Immediates.
   303  		case v == "UIMM8()", v == "SIMM8()":
   304  			addImm(insn, 1)
   305  		case v == "UIMM16()":
   306  			addImm(insn, 2)
   307  		case v == "UIMM32()":
   308  			addImm(insn, 4)
   309  		case v == "SIMMz()":
   310  			addImm(insn, -1)
   311  		case v == "UIMMv()":
   312  			addImm(insn, -3)
   313  		case v == "UIMM8_1()":
   314  			addImm(insn, 1)
   315  		case v == "BRDISP8()":
   316  			addImm(insn, 1)
   317  		case v == "BRDISP32()":
   318  			addImm(insn, 4)
   319  		case v == "BRDISPz()":
   320  			addImm(insn, -1)
   321  		case v == "MEMDISPv()":
   322  			addImm(insn, -2)
   323  
   324  		// VOP/VEX
   325  		case v == "XOPV":
   326  			insn.Vex = 0x8f
   327  			insn.Mode &^= 1 << iset.ModeReal16
   328  		case v == "EVV":
   329  			insn.Vex = 0xc4
   330  		case v == "VV1":
   331  			insn.Vex = 0xc4
   332  		case v == "VMAP0":
   333  			insn.VexMap = 0
   334  		case v == "V0F":
   335  			insn.VexMap = 1
   336  		case v == "V0F38":
   337  			insn.VexMap = 2
   338  		case v == "V0F3A":
   339  			insn.VexMap = 3
   340  		case v == "XMAP8":
   341  			insn.VexMap = 8
   342  		case v == "XMAP9":
   343  			insn.VexMap = 9
   344  		case v == "XMAPA":
   345  			insn.VexMap = 10
   346  		case v == "VNP":
   347  			insn.VexP = 0
   348  		case v == "V66":
   349  			insn.VexP = 1
   350  		case v == "VF2":
   351  			insn.VexP = 3
   352  		case v == "VF3":
   353  			insn.VexP = 2
   354  		case v == "VL128", v == "VL=0":
   355  			insn.VexL = -1
   356  		case v == "VL256", v == "VL=1":
   357  			insn.VexL = 1
   358  		case v == "NOVSR":
   359  			insn.VexNoR = true
   360  		case v == "NOEVSR":
   361  			insn.VexNoR = true
   362  			// VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0
   363  		case v == "SE_IMM8()":
   364  			addImm(insn, 1)
   365  
   366  		// Modes.
   367  		case v == "mode64":
   368  			insn.Mode &= 1 << iset.ModeLong64
   369  		case v == "not64":
   370  			insn.Mode &^= 1 << iset.ModeLong64
   371  		case v == "mode32":
   372  			insn.Mode &= 1 << iset.ModeProt32
   373  		case v == "mode16":
   374  			insn.Mode &= 1<<iset.ModeProt16 | 1<<iset.ModeReal16
   375  		case v == "eamode64",
   376  			v == "eamode32",
   377  			v == "eamode16",
   378  			v == "eanot16":
   379  
   380  		case v == "no_refining_prefix":
   381  			insn.NoRepPrefix = true
   382  			insn.No66Prefix = true
   383  		case v == "no66_prefix", v == "eosz32", v == "eosz64":
   384  			insn.No66Prefix = true
   385  		case v == "eosz16", v == "eosznot64", v == "REP!=3":
   386  			// TODO(dvyukov): this may have some effect on REP/66 prefixes,
   387  			// but this wasn't checked. These are just added here to unbreak build.
   388  		case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2":
   389  			insn.Prefix = append(insn.Prefix, 0xF2)
   390  			insn.NoRepPrefix = true
   391  		case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3":
   392  			insn.Prefix = append(insn.Prefix, 0xF3)
   393  			insn.NoRepPrefix = true
   394  		case v == "norep", v == "not_refining", v == "REP=0":
   395  			insn.NoRepPrefix = true
   396  		case v == "osz_refining_prefix":
   397  			insn.Prefix = append(insn.Prefix, 0x66)
   398  			insn.NoRepPrefix = true
   399  		case v == "rexw_prefix", v == "W1":
   400  			insn.Rexw = 1
   401  		case v == "norexw_prefix", v == "W0":
   402  			insn.Rexw = -1
   403  		case v == "MPXMODE=1",
   404  			v == "MPXMODE=0",
   405  			v == "TZCNT=1",
   406  			v == "TZCNT=0",
   407  			v == "LZCNT=1",
   408  			v == "LZCNT=0",
   409  			v == "CR_WIDTH()",
   410  			v == "DF64()",
   411  			v == "IMMUNE_REXW()",
   412  			v == "FORCE64()",
   413  			v == "EOSZ=1",
   414  			v == "EOSZ!=1",
   415  			v == "EOSZ=2",
   416  			v == "EOSZ!=2",
   417  			v == "EOSZ=3",
   418  			v == "EOSZ!=3",
   419  			v == "BRANCH_HINT()",
   420  			v == "P4=1",
   421  			v == "P4=0",
   422  			v == "rexb_prefix",
   423  			v == "norexb_prefix",
   424  			v == "IMMUNE66()",
   425  			v == "REFINING66()",
   426  			v == "IGNORE66()",
   427  			v == "IMMUNE66_LOOP64()",
   428  			v == "OVERRIDE_SEG0()",
   429  			v == "OVERRIDE_SEG1()",
   430  			v == "REMOVE_SEGMENT()",
   431  			v == "ONE()",
   432  			v == "nolock_prefix",
   433  			v == "MODRM()",
   434  			v == "VMODRM_XMM()",
   435  			v == "VMODRM_YMM()",
   436  			v == "BCRC=0",
   437  			v == "BCRC=1",
   438  			v == "ESIZE_8_BITS()",
   439  			v == "ESIZE_16_BITS()",
   440  			v == "ESIZE_32_BITS()",
   441  			v == "ESIZE_64_BITS()",
   442  			v == "ESIZE_128_BITS()",
   443  			v == "NELEM_GPR_WRITER_STORE()",
   444  			v == "NELEM_GPR_WRITER_STORE_BYTE()",
   445  			v == "NELEM_GPR_WRITER_STORE_WORD()",
   446  			v == "NELEM_GPR_WRITER_LDOP_Q()",
   447  			v == "NELEM_GPR_WRITER_LDOP_D()",
   448  			v == "NELEM_GPR_READER()",
   449  			v == "NELEM_GPR_READER_BYTE()",
   450  			v == "NELEM_GPR_READER_WORD()",
   451  			v == "NELEM_GSCAT()",
   452  			v == "NELEM_HALF()",
   453  			v == "NELEM_FULL()",
   454  			v == "NELEM_FULLMEM()",
   455  			v == "NELEM_QUARTERMEM()",
   456  			v == "NELEM_EIGHTHMEM()",
   457  			v == "NELEM_HALFMEM()",
   458  			v == "NELEM_MEM128()",
   459  			v == "NELEM_SCALAR()",
   460  			v == "NELEM_TUPLE1()",
   461  			v == "NELEM_TUPLE2()",
   462  			v == "NELEM_TUPLE4()",
   463  			v == "NELEM_TUPLE8()",
   464  			v == "NELEM_TUPLE1_4X()",
   465  			v == "NELEM_TUPLE1_BYTE()",
   466  			v == "NELEM_TUPLE1_WORD()",
   467  			v == "NELEM_MOVDDUP()",
   468  			v == "UISA_VMODRM_XMM()",
   469  			v == "UISA_VMODRM_YMM()",
   470  			v == "UISA_VMODRM_ZMM()",
   471  			v == "MASK=0",
   472  			v == "FIX_ROUND_LEN128()",
   473  			v == "FIX_ROUND_LEN512()",
   474  			v == "AVX512_ROUND()",
   475  			v == "ZEROING=0",
   476  			v == "SAE()",
   477  			v == "VL512", // VL=2
   478  			v == "not_refining_f3",
   479  			v == "EVEXRR_ONE",
   480  			v == "CET=0",
   481  			v == "CET=1",
   482  			v == "WBNOINVD=0",
   483  			v == "WBNOINVD=1",
   484  			v == "CLDEMOTE=0",
   485  			v == "CLDEMOTE=1",
   486  			strings.HasPrefix(v, "MODEP5="):
   487  		default:
   488  			return errSkip(fmt.Sprintf("unknown pattern %v", v))
   489  		}
   490  	}
   491  	if insn.Modrm {
   492  		switch insn.Mod {
   493  		case -3, -1, 0, 1, 2, 3:
   494  		default:
   495  			return fmt.Errorf("bad MOD value: %v", insn.Mod)
   496  		}
   497  		if insn.Reg < -1 || insn.Reg > 7 {
   498  			return fmt.Errorf("bad REG value: %v", insn.Mod)
   499  		}
   500  		if insn.Rm < -1 || insn.Rm > 7 {
   501  			return fmt.Errorf("bad RM value: %v", insn.Mod)
   502  		}
   503  	}
   504  	if insn.Imm != 0 && len(insn.Suffix) != 0 {
   505  		return fmt.Errorf("both immediate and suffix opcode")
   506  	}
   507  	if insn.Mode == 0 {
   508  		return errSkip("no modes for instruction")
   509  	}
   510  	return nil
   511  }
   512  
   513  func parseOperands(insn *x86.Insn, vals []string) error {
   514  	for _, v := range vals {
   515  		switch v {
   516  		case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w":
   517  			if insn.Reg != -1 {
   518  				return fmt.Errorf("REG=SEG() operand, but fixed reg")
   519  			}
   520  			insn.Reg = -6
   521  		case "REG0=CR_R():w", "REG1=CR_R():r":
   522  			if insn.Reg != -1 {
   523  				return fmt.Errorf("REG=CR_R() operand, but fixed reg")
   524  			}
   525  			insn.Reg = -8
   526  			insn.NoSibDisp = true
   527  		case "REG0=DR_R():w", "REG1=DR_R():r":
   528  			insn.NoSibDisp = true
   529  		case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int":
   530  			insn.Mem16 = true
   531  		case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int":
   532  			insn.Mem32 = true
   533  		}
   534  	}
   535  	return nil
   536  }
   537  
   538  func parseModrm(v string) (int8, error) {
   539  	if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' {
   540  		return 0, fmt.Errorf("malformed")
   541  	}
   542  	if v == "[mm]" || v == "[rrr]" || v == "[nnn]" {
   543  		return -1, nil
   544  	}
   545  	if !strings.HasPrefix(v, "[0b") {
   546  		return 0, fmt.Errorf("malformed")
   547  	}
   548  	var vv int8
   549  	for i := 3; i < len(v)-1; i++ {
   550  		if v[i] != '0' && v[i] != '1' {
   551  			return 0, fmt.Errorf("malformed")
   552  		}
   553  		vv *= 2
   554  		if v[i] == '1' {
   555  			vv++
   556  		}
   557  	}
   558  	return vv, nil
   559  }
   560  
   561  func addImm(insn *x86.Insn, imm int8) {
   562  	if insn.Imm == 0 {
   563  		insn.Imm = imm
   564  		return
   565  	}
   566  	if insn.Imm2 == 0 {
   567  		insn.Imm2 = imm
   568  		return
   569  	}
   570  	panic("too many immediates")
   571  }