github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ifuzz/x86/gen/gen.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // gen generates instruction tables (ifuzz_types/insns.go) from Intel XED tables.
     5  // Tables used to generate insns.go are checked in in all-enc-instructions.txt.
     6  package main
     7  
     8  import (
     9  	"bufio"
    10  	"errors"
    11  	"fmt"
    12  	"os"
    13  	"reflect"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"github.com/google/syzkaller/pkg/ifuzz/iset"
    18  	"github.com/google/syzkaller/pkg/ifuzz/x86"
    19  	"github.com/google/syzkaller/pkg/serializer"
    20  	"github.com/google/syzkaller/pkg/tool"
    21  )
    22  
    23  // nolint: gocyclo, gocognit, funlen, dupl
    24  func main() {
    25  	if len(os.Args) != 2 {
    26  		tool.Failf("usage: gen instructions.txt")
    27  	}
    28  	f, err := os.Open(os.Args[1])
    29  	if err != nil {
    30  		tool.Failf("failed to open input file: %v", err)
    31  	}
    32  	defer f.Close()
    33  
    34  	skipped := 0
    35  	saved := ""
    36  	var insns []*x86.Insn
    37  	var insn, insn1 *x86.Insn
    38  	s := bufio.NewScanner(f)
    39  	for i := 1; s.Scan(); i++ {
    40  		reportError := func(msg string, args ...interface{}) {
    41  			fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text())
    42  			tool.Failf(msg, args...)
    43  		}
    44  		line := s.Text()
    45  		if comment := strings.IndexByte(line, '#'); comment != -1 {
    46  			line = line[:comment]
    47  		}
    48  		line = strings.TrimSpace(line)
    49  		if line == "" {
    50  			continue
    51  		}
    52  		if line[len(line)-1] == '\\' {
    53  			saved += line[:len(line)-1]
    54  			continue
    55  		}
    56  		line = saved + line
    57  		saved = ""
    58  		if line == "{" {
    59  			insn = new(x86.Insn)
    60  			continue
    61  		}
    62  		if line == "}" {
    63  			if insn1 != nil {
    64  				insns = append(insns, insn1)
    65  				insn1 = nil
    66  				insn = nil
    67  			}
    68  			continue
    69  		}
    70  		colon := strings.IndexByte(line, ':')
    71  		if colon == -1 {
    72  			reportError("no colon")
    73  		}
    74  		name := strings.TrimSpace(line[:colon])
    75  		if name == "" {
    76  			reportError("empty attribute name")
    77  		}
    78  		var vals []string
    79  		for _, v := range strings.Split(line[colon+1:], " ") {
    80  			v = strings.TrimSpace(v)
    81  			if v == "" {
    82  				continue
    83  			}
    84  			vals = append(vals, v)
    85  		}
    86  		switch name {
    87  		case "ICLASS":
    88  			if len(vals) != 1 {
    89  				reportError("ICLASS has more than one value")
    90  			}
    91  			insn.Name = vals[0]
    92  		case "CPL":
    93  			if len(vals) != 1 {
    94  				reportError("CPL has more than one value")
    95  			}
    96  			if vals[0] != "0" && vals[0] != "3" {
    97  				reportError("unknown CPL value: %v", vals[0])
    98  			}
    99  			insn.Priv = vals[0] == "0"
   100  		case "EXTENSION":
   101  			if len(vals) != 1 {
   102  				reportError("EXTENSION has more than one value")
   103  			}
   104  			insn.Extension = vals[0]
   105  			switch insn.Extension {
   106  			case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER":
   107  				insn.Mode = 1<<iset.ModeLong64 | 1<<iset.ModeProt32
   108  			}
   109  			insn.Avx2Gather = insn.Extension == "AVX2GATHER"
   110  		case "PATTERN":
   111  			if insn1 != nil {
   112  				insns = append(insns, insn1)
   113  			}
   114  			insn1 = new(x86.Insn)
   115  			*insn1 = *insn
   116  			if err := parsePattern(insn1, vals); err != nil {
   117  				var errSkip errSkip
   118  				if !errors.As(err, &errSkip) {
   119  					reportError(errSkip.Error())
   120  				}
   121  				if err.Error() != "" {
   122  					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
   123  				}
   124  				skipped++
   125  				insn1 = nil
   126  			}
   127  		case "OPERANDS":
   128  			if insn1 == nil {
   129  				break
   130  			}
   131  			if err := parseOperands(insn1, vals); err != nil {
   132  				var errSkip errSkip
   133  				if !errors.As(err, &errSkip) {
   134  					reportError(errSkip.Error())
   135  				}
   136  				if err.Error() != "" {
   137  					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
   138  				}
   139  				skipped++
   140  				insn1 = nil
   141  			}
   142  		}
   143  	}
   144  
   145  	var deduped []*x86.Insn
   146  nextInsn:
   147  	for _, insn := range insns {
   148  		if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" {
   149  			skipped++
   150  			continue
   151  		}
   152  		mod0 := insn.Mod
   153  		for j := len(deduped) - 1; j >= 0; j-- {
   154  			insn1 := deduped[j]
   155  			if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 {
   156  				insn.Mod = insn1.Mod
   157  			}
   158  			if reflect.DeepEqual(insn, insn1) {
   159  				if insn.Mod != mod0 {
   160  					insn1.Mod = -1
   161  				}
   162  				continue nextInsn
   163  			}
   164  			insn.Mod = mod0
   165  		}
   166  		deduped = append(deduped, insn)
   167  	}
   168  	fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped))
   169  	insns = deduped
   170  
   171  	fmt.Printf(`
   172  // Code generated by pkg/ifuzz/gen. DO NOT EDIT.
   173  
   174  // +build !codeanalysis
   175  
   176  package x86
   177  
   178  import "github.com/google/syzkaller/pkg/ifuzz/x86"
   179  
   180  func init() {
   181  	x86.Register(insns_x86)
   182  }
   183  
   184  var insns_x86 = []*Insn{
   185  `)
   186  	serializer.Write(os.Stdout, insns)
   187  
   188  	fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped)
   189  }
   190  
   191  type errSkip string
   192  
   193  func (err errSkip) Error() string {
   194  	return string(err)
   195  }
   196  
   197  // nolint: gocyclo, gocognit, funlen
   198  func parsePattern(insn *x86.Insn, vals []string) error {
   199  	if insn.Opcode != nil {
   200  		return fmt.Errorf("PATTERN is already parsed for the instruction")
   201  	}
   202  	// As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix.
   203  	if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" ||
   204  		insn.Name == "NOP8" || insn.Name == "NOP9" {
   205  		return errSkip("")
   206  	}
   207  	if insn.Mode == 0 {
   208  		insn.Mode = 1<<iset.ModeLast - 1
   209  	}
   210  	insn.Mod = -100
   211  	insn.Reg = -100
   212  	insn.Rm = -100
   213  	insn.VexP = -1
   214  	for _, v := range vals {
   215  		switch {
   216  		case strings.HasPrefix(v, "0x"):
   217  			op, err := strconv.ParseUint(v, 0, 8)
   218  			if err != nil {
   219  				return fmt.Errorf("failed to parse hex pattern: %v", v)
   220  			}
   221  			if !insn.Modrm {
   222  				insn.Opcode = append(insn.Opcode, byte(op))
   223  			} else {
   224  				insn.Suffix = append(insn.Suffix, byte(op))
   225  			}
   226  		case strings.HasPrefix(v, "0b"):
   227  			if len(v) != 8 || v[6] != '_' {
   228  				return fmt.Errorf("failed to parse bin pattern: %v", v)
   229  			}
   230  			var op byte
   231  			if v[2] == '1' {
   232  				op |= 1 << 7
   233  			}
   234  			if v[3] == '1' {
   235  				op |= 1 << 6
   236  			}
   237  			if v[4] == '1' {
   238  				op |= 1 << 5
   239  			}
   240  			if v[5] == '1' {
   241  				op |= 1 << 4
   242  			}
   243  			if v[7] == '1' {
   244  				op |= 1 << 3
   245  			}
   246  			insn.Opcode = append(insn.Opcode, op)
   247  		case strings.HasPrefix(v, "MOD["):
   248  			insn.Modrm = true
   249  			vv, err := parseModrm(v[3:])
   250  			if err != nil {
   251  				return fmt.Errorf("failed to parse %v: %w", v, err)
   252  			}
   253  			insn.Mod = vv
   254  		case strings.HasPrefix(v, "REG["):
   255  			insn.Modrm = true
   256  			vv, err := parseModrm(v[3:])
   257  			if err != nil {
   258  				return fmt.Errorf("failed to parse %v: %w", v, err)
   259  			}
   260  			insn.Reg = vv
   261  		case strings.HasPrefix(v, "RM["):
   262  			insn.Modrm = true
   263  			vv, err := parseModrm(v[2:])
   264  			if err != nil {
   265  				return fmt.Errorf("failed to parse %v: %w", v, err)
   266  			}
   267  			insn.Rm = vv
   268  		case v == "RM=4":
   269  			insn.Rm = 4
   270  		case strings.HasPrefix(v, "SRM["):
   271  			vv, err := parseModrm(v[3:])
   272  			if err != nil {
   273  				return fmt.Errorf("failed to parse %v: %w", v, err)
   274  			}
   275  			insn.Rm = vv
   276  			insn.Srm = true
   277  		case v == "SRM=0", v == "SRM!=0":
   278  		case v == "MOD!=3":
   279  			if !insn.Modrm || insn.Mod != -1 {
   280  				return fmt.Errorf("MOD!=3 without MOD")
   281  			}
   282  			insn.Mod = -3
   283  		case v == "MOD=3":
   284  			// Most other instructions contain "MOD[0b11] MOD=3",
   285  			// but BNDCL contains "MOD[mm] MOD=3"
   286  			insn.Mod = 3
   287  		case v == "MOD=0":
   288  			insn.Mod = 0
   289  		case v == "MOD=1":
   290  			insn.Mod = 1
   291  		case v == "MOD=2":
   292  			insn.Mod = 2
   293  		case v == "lock_prefix":
   294  			insn.Prefix = append(insn.Prefix, 0xF0)
   295  
   296  		// Immediates.
   297  		case v == "UIMM8()", v == "SIMM8()":
   298  			addImm(insn, 1)
   299  		case v == "UIMM16()":
   300  			addImm(insn, 2)
   301  		case v == "UIMM32()":
   302  			addImm(insn, 4)
   303  		case v == "SIMMz()":
   304  			addImm(insn, -1)
   305  		case v == "UIMMv()":
   306  			addImm(insn, -3)
   307  		case v == "UIMM8_1()":
   308  			addImm(insn, 1)
   309  		case v == "BRDISP8()":
   310  			addImm(insn, 1)
   311  		case v == "BRDISP32()":
   312  			addImm(insn, 4)
   313  		case v == "BRDISPz()":
   314  			addImm(insn, -1)
   315  		case v == "MEMDISPv()":
   316  			addImm(insn, -2)
   317  
   318  		// VOP/VEX
   319  		case v == "XOPV":
   320  			insn.Vex = 0x8f
   321  			insn.Mode &^= 1 << iset.ModeReal16
   322  		case v == "EVV":
   323  			insn.Vex = 0xc4
   324  		case v == "VV1":
   325  			insn.Vex = 0xc4
   326  		case v == "VMAP0":
   327  			insn.VexMap = 0
   328  		case v == "V0F":
   329  			insn.VexMap = 1
   330  		case v == "V0F38":
   331  			insn.VexMap = 2
   332  		case v == "V0F3A":
   333  			insn.VexMap = 3
   334  		case v == "XMAP8":
   335  			insn.VexMap = 8
   336  		case v == "XMAP9":
   337  			insn.VexMap = 9
   338  		case v == "XMAPA":
   339  			insn.VexMap = 10
   340  		case v == "VNP":
   341  			insn.VexP = 0
   342  		case v == "V66":
   343  			insn.VexP = 1
   344  		case v == "VF2":
   345  			insn.VexP = 3
   346  		case v == "VF3":
   347  			insn.VexP = 2
   348  		case v == "VL128", v == "VL=0":
   349  			insn.VexL = -1
   350  		case v == "VL256", v == "VL=1":
   351  			insn.VexL = 1
   352  		case v == "NOVSR":
   353  			insn.VexNoR = true
   354  		case v == "NOEVSR":
   355  			insn.VexNoR = true
   356  			// VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0
   357  		case v == "SE_IMM8()":
   358  			addImm(insn, 1)
   359  
   360  		// Modes.
   361  		case v == "mode64":
   362  			insn.Mode &= 1 << iset.ModeLong64
   363  		case v == "not64":
   364  			insn.Mode &^= 1 << iset.ModeLong64
   365  		case v == "mode32":
   366  			insn.Mode &= 1 << iset.ModeProt32
   367  		case v == "mode16":
   368  			insn.Mode &= 1<<iset.ModeProt16 | 1<<iset.ModeReal16
   369  		case v == "eamode64",
   370  			v == "eamode32",
   371  			v == "eamode16",
   372  			v == "eanot16":
   373  
   374  		case v == "no_refining_prefix":
   375  			insn.NoRepPrefix = true
   376  			insn.No66Prefix = true
   377  		case v == "no66_prefix", v == "eosz32", v == "eosz64":
   378  			insn.No66Prefix = true
   379  		case v == "eosz16", v == "eosznot64", v == "REP!=3":
   380  			// TODO(dvyukov): this may have some effect on REP/66 prefixes,
   381  			// but this wasn't checked. These are just added here to unbreak build.
   382  		case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2":
   383  			insn.Prefix = append(insn.Prefix, 0xF2)
   384  			insn.NoRepPrefix = true
   385  		case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3":
   386  			insn.Prefix = append(insn.Prefix, 0xF3)
   387  			insn.NoRepPrefix = true
   388  		case v == "norep", v == "not_refining", v == "REP=0":
   389  			insn.NoRepPrefix = true
   390  		case v == "osz_refining_prefix":
   391  			insn.Prefix = append(insn.Prefix, 0x66)
   392  			insn.NoRepPrefix = true
   393  		case v == "rexw_prefix", v == "W1":
   394  			insn.Rexw = 1
   395  		case v == "norexw_prefix", v == "W0":
   396  			insn.Rexw = -1
   397  		case v == "MPXMODE=1",
   398  			v == "MPXMODE=0",
   399  			v == "TZCNT=1",
   400  			v == "TZCNT=0",
   401  			v == "LZCNT=1",
   402  			v == "LZCNT=0",
   403  			v == "CR_WIDTH()",
   404  			v == "DF64()",
   405  			v == "IMMUNE_REXW()",
   406  			v == "FORCE64()",
   407  			v == "EOSZ=1",
   408  			v == "EOSZ!=1",
   409  			v == "EOSZ=2",
   410  			v == "EOSZ!=2",
   411  			v == "EOSZ=3",
   412  			v == "EOSZ!=3",
   413  			v == "BRANCH_HINT()",
   414  			v == "P4=1",
   415  			v == "P4=0",
   416  			v == "rexb_prefix",
   417  			v == "norexb_prefix",
   418  			v == "IMMUNE66()",
   419  			v == "REFINING66()",
   420  			v == "IGNORE66()",
   421  			v == "IMMUNE66_LOOP64()",
   422  			v == "OVERRIDE_SEG0()",
   423  			v == "OVERRIDE_SEG1()",
   424  			v == "REMOVE_SEGMENT()",
   425  			v == "ONE()",
   426  			v == "nolock_prefix",
   427  			v == "MODRM()",
   428  			v == "VMODRM_XMM()",
   429  			v == "VMODRM_YMM()",
   430  			v == "BCRC=0",
   431  			v == "BCRC=1",
   432  			v == "ESIZE_8_BITS()",
   433  			v == "ESIZE_16_BITS()",
   434  			v == "ESIZE_32_BITS()",
   435  			v == "ESIZE_64_BITS()",
   436  			v == "ESIZE_128_BITS()",
   437  			v == "NELEM_GPR_WRITER_STORE()",
   438  			v == "NELEM_GPR_WRITER_STORE_BYTE()",
   439  			v == "NELEM_GPR_WRITER_STORE_WORD()",
   440  			v == "NELEM_GPR_WRITER_LDOP_Q()",
   441  			v == "NELEM_GPR_WRITER_LDOP_D()",
   442  			v == "NELEM_GPR_READER()",
   443  			v == "NELEM_GPR_READER_BYTE()",
   444  			v == "NELEM_GPR_READER_WORD()",
   445  			v == "NELEM_GSCAT()",
   446  			v == "NELEM_HALF()",
   447  			v == "NELEM_FULL()",
   448  			v == "NELEM_FULLMEM()",
   449  			v == "NELEM_QUARTERMEM()",
   450  			v == "NELEM_EIGHTHMEM()",
   451  			v == "NELEM_HALFMEM()",
   452  			v == "NELEM_MEM128()",
   453  			v == "NELEM_SCALAR()",
   454  			v == "NELEM_TUPLE1()",
   455  			v == "NELEM_TUPLE2()",
   456  			v == "NELEM_TUPLE4()",
   457  			v == "NELEM_TUPLE8()",
   458  			v == "NELEM_TUPLE1_4X()",
   459  			v == "NELEM_TUPLE1_BYTE()",
   460  			v == "NELEM_TUPLE1_WORD()",
   461  			v == "NELEM_MOVDDUP()",
   462  			v == "UISA_VMODRM_XMM()",
   463  			v == "UISA_VMODRM_YMM()",
   464  			v == "UISA_VMODRM_ZMM()",
   465  			v == "MASK=0",
   466  			v == "FIX_ROUND_LEN128()",
   467  			v == "FIX_ROUND_LEN512()",
   468  			v == "AVX512_ROUND()",
   469  			v == "ZEROING=0",
   470  			v == "SAE()",
   471  			v == "VL512", // VL=2
   472  			v == "not_refining_f3",
   473  			v == "EVEXRR_ONE",
   474  			v == "CET=0",
   475  			v == "CET=1",
   476  			v == "WBNOINVD=0",
   477  			v == "WBNOINVD=1",
   478  			v == "CLDEMOTE=0",
   479  			v == "CLDEMOTE=1",
   480  			strings.HasPrefix(v, "MODEP5="):
   481  		default:
   482  			return errSkip(fmt.Sprintf("unknown pattern %v", v))
   483  		}
   484  	}
   485  	if insn.Modrm {
   486  		switch insn.Mod {
   487  		case -3, -1, 0, 1, 2, 3:
   488  		default:
   489  			return fmt.Errorf("bad MOD value: %v", insn.Mod)
   490  		}
   491  		if insn.Reg < -1 || insn.Reg > 7 {
   492  			return fmt.Errorf("bad REG value: %v", insn.Mod)
   493  		}
   494  		if insn.Rm < -1 || insn.Rm > 7 {
   495  			return fmt.Errorf("bad RM value: %v", insn.Mod)
   496  		}
   497  	}
   498  	if insn.Imm != 0 && len(insn.Suffix) != 0 {
   499  		return fmt.Errorf("both immediate and suffix opcode")
   500  	}
   501  	if insn.Mode == 0 {
   502  		return errSkip("no modes for instruction")
   503  	}
   504  	return nil
   505  }
   506  
   507  func parseOperands(insn *x86.Insn, vals []string) error {
   508  	for _, v := range vals {
   509  		switch v {
   510  		case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w":
   511  			if insn.Reg != -1 {
   512  				return fmt.Errorf("REG=SEG() operand, but fixed reg")
   513  			}
   514  			insn.Reg = -6
   515  		case "REG0=CR_R():w", "REG1=CR_R():r":
   516  			if insn.Reg != -1 {
   517  				return fmt.Errorf("REG=CR_R() operand, but fixed reg")
   518  			}
   519  			insn.Reg = -8
   520  			insn.NoSibDisp = true
   521  		case "REG0=DR_R():w", "REG1=DR_R():r":
   522  			insn.NoSibDisp = true
   523  		case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int":
   524  			insn.Mem16 = true
   525  		case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int":
   526  			insn.Mem32 = true
   527  		}
   528  	}
   529  	return nil
   530  }
   531  
   532  func parseModrm(v string) (int8, error) {
   533  	if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' {
   534  		return 0, fmt.Errorf("malformed")
   535  	}
   536  	if v == "[mm]" || v == "[rrr]" || v == "[nnn]" {
   537  		return -1, nil
   538  	}
   539  	if !strings.HasPrefix(v, "[0b") {
   540  		return 0, fmt.Errorf("malformed")
   541  	}
   542  	var vv int8
   543  	for i := 3; i < len(v)-1; i++ {
   544  		if v[i] != '0' && v[i] != '1' {
   545  			return 0, fmt.Errorf("malformed")
   546  		}
   547  		vv *= 2
   548  		if v[i] == '1' {
   549  			vv++
   550  		}
   551  	}
   552  	return vv, nil
   553  }
   554  
   555  func addImm(insn *x86.Insn, imm int8) {
   556  	if insn.Imm == 0 {
   557  		insn.Imm = imm
   558  		return
   559  	}
   560  	if insn.Imm2 == 0 {
   561  		insn.Imm2 = imm
   562  		return
   563  	}
   564  	panic("too many immediates")
   565  }