github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-gen-x86-64.h (about)

     1  /*
     2   * jit-gen-x86-64.h - Macros for generating x86_64 code.
     3   *
     4   * Copyright (C) 2008  Southern Storm Software, Pty Ltd.
     5   *
     6   * This file is part of the libjit library.
     7   *
     8   * The libjit library is free software: you can redistribute it and/or
     9   * modify it under the terms of the GNU Lesser General Public License
    10   * as published by the Free Software Foundation, either version 2.1 of
    11   * the License, or (at your option) any later version.
    12   *
    13   * The libjit library is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16   * Lesser General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU Lesser General Public
    19   * License along with the libjit library.  If not, see
    20   * <http://www.gnu.org/licenses/>.
    21   */
    22  
    23  #ifndef	_JIT_GEN_X86_64_H
    24  #define	_JIT_GEN_X86_64_H
    25  
    26  #include <jit/jit-defs.h>
    27  #include "jit-gen-x86.h"
    28  
    29  #ifdef	__cplusplus
    30  extern	"C" {
    31  #endif
    32  
    33  /*
    34   * X86_64 64 bit general purpose integer registers.
    35   */
    36  typedef enum
    37  {
    38  	X86_64_RAX = 0,
    39  	X86_64_RCX = 1,
    40  	X86_64_RDX = 2,
    41  	X86_64_RBX = 3,
    42  	X86_64_RSP = 4,
    43  	X86_64_RBP = 5,
    44  	X86_64_RSI = 6,
    45  	X86_64_RDI = 7,
    46  	X86_64_R8  = 8,
    47  	X86_64_R9  = 9,
    48  	X86_64_R10 = 10,
    49  	X86_64_R11 = 11,
    50  	X86_64_R12 = 12,
    51  	X86_64_R13 = 13,
    52  	X86_64_R14 = 14,
    53  	X86_64_R15 = 15,
    54  	X86_64_RIP = 16		/* This register encoding doesn't exist in the */
    55  						/* instructions. It's used for RIP relative encoding. */
    56  } X86_64_Reg_No;
    57  
    58  /*
    59   * X86-64 xmm registers.
    60   */
    61  typedef enum
    62  {
    63  	X86_64_XMM0 = 0,
    64  	X86_64_XMM1 = 1,
    65  	X86_64_XMM2 = 2,
    66  	X86_64_XMM3 = 3,
    67  	X86_64_XMM4 = 4,
    68  	X86_64_XMM5 = 5,
    69  	X86_64_XMM6 = 6,
    70  	X86_64_XMM7 = 7,
    71  	X86_64_XMM8 = 8,
    72  	X86_64_XMM9 = 9,
    73  	X86_64_XMM10 = 10,
    74  	X86_64_XMM11 = 11,
    75  	X86_64_XMM12 = 12,
    76  	X86_64_XMM13 = 13,
    77  	X86_64_XMM14 = 14,
    78  	X86_64_XMM15 = 15
    79  } X86_64_XMM_Reg_No;
    80  
    81  /*
    82   * Bits in the REX prefix byte.
    83   */
    84  typedef enum
    85  {
    86  	X86_64_REX_B = 1,	/* 1-bit (high) extension of the ModRM r/m field */
    87  						/* SIB base field, or opcode reg field, thus */
    88  						/* permitting access to 16 registers. */
    89  	X86_64_REX_X = 2,	/* 1-bit (high) extension of the SIB index field */
    90  						/* thus permitting access to 16 registers. */
    91  	X86_64_REX_R = 4,	/* 1-bit (high) extension of the ModRM reg field, */
    92  						/* thus permitting access to 16 registers. */
    93  	X86_64_REX_W = 8	/* 0 = Default operand size */
    94  						/* 1 = 64 bit operand size */
    95  } X86_64_REX_Bits;
    96  
    97  /*
    98   * Third part of the opcodes for xmm instructions which are encoded
    99   * Opcode1: 0xF3 (single precision) or 0xF2 (double precision)
   100   *          This is handled as a prefix.
   101   * Opcode2: 0x0F
   102   */
   103  typedef enum
   104  {
   105  	XMM1_MOV		= 0x10,
   106  	XMM1_MOV_REV	= 0x11,
   107  	XMM1_ADD		= 0x58,
   108  	XMM1_MUL		= 0x59,
   109  	XMM1_SUB		= 0x5C,
   110  	XMM1_DIV		= 0x5E
   111  } X86_64_XMM1_OP;
   112  
   113  /*
   114   * Logical opcodes used with packed single and double precision values.
   115   */
   116  typedef enum
   117  {
   118  	XMM_ANDP		= 0x54,
   119  	XMM_ORP			= 0x56,
   120  	XMM_XORP		= 0x57
   121  } X86_64_XMM_PLOP;
   122  
   123  /*
   124   * Rounding modes for xmm rounding instructions, the mxcsr register and
   125   * the fpu control word.
   126   */
   127  typedef enum
   128  {
   129  	X86_ROUND_NEAREST	= 0x00,		/* Round to the nearest integer */
   130  	X86_ROUND_DOWN		= 0x01,		/* Round towards negative infinity */
   131  	X86_ROUND_UP		= 0x02,		/* Round towards positive infinity */
   132  	X86_ROUND_ZERO		= 0x03		/* Round towards zero (truncate) */
   133  } X86_64_ROUNDMODE;
   134  
   135  /*
   136   * Helper union for emmitting 64 bit immediate values.
   137   */
   138  typedef union
   139  {
   140  	jit_long val;
   141  	unsigned char b[8];
   142  } x86_64_imm_buf;
   143  
   144  #define x86_64_imm_emit64(inst, imm) \
   145  	do { \
   146  		x86_64_imm_buf imb; \
   147  		imb.val = (jit_long)(imm); \
   148  		*(inst)++ = imb.b[0]; \
   149  		*(inst)++ = imb.b[1]; \
   150  		*(inst)++ = imb.b[2]; \
   151  		*(inst)++ = imb.b[3]; \
   152  		*(inst)++ = imb.b[4]; \
   153  		*(inst)++ = imb.b[5]; \
   154  		*(inst)++ = imb.b[6]; \
   155  		*(inst)++ = imb.b[7]; \
   156  	} while (0)
   157  
   158  #define x86_64_imm_emit_max32(inst, imm, size) \
   159  	do { \
   160  		switch((size)) \
   161  		{ \
   162  			case 1: \
   163  			{ \
   164  				x86_imm_emit8(inst, (imm)); \
   165  			} \
   166  			break; \
   167  			case 2: \
   168  			{ \
   169  				x86_imm_emit16(inst, (imm)); \
   170  			} \
   171  			break; \
   172  			case 4: \
   173  			case 8: \
   174  			{ \
   175  				x86_imm_emit32((inst), (imm)); \
   176  			} \
   177  			break; \
   178  			default: \
   179  			{ \
   180  				jit_assert(0); \
   181  			} \
   182  		} \
   183  	} while(0)
   184  
   185  #define x86_64_imm_emit_max64(inst, imm, size) \
   186  	do { \
   187  		switch((size)) \
   188  		{ \
   189  			case 1: \
   190  			{ \
   191  				x86_imm_emit8(inst, (imm)); \
   192  			} \
   193  			break; \
   194  			case 2: \
   195  			{ \
   196  				x86_imm_emit16(inst, (imm)); \
   197  			} \
   198  			break; \
   199  			case 4: \
   200  			{ \
   201  				x86_imm_emit32((inst), (imm)); \
   202  			} \
   203  			break; \
   204  			case 8: \
   205  			{ \
   206  				x86_64_imm_emit64(inst, (imm)); \
   207  			} \
   208  			break; \
   209  			default: \
   210  			{ \
   211  				jit_assert(0); \
   212  			} \
   213  		} \
   214  	} while(0)
   215  
   216  /*
   217   * Emit the Rex prefix.
   218   * The natural size is a power of 2 (1, 2, 4 or 8).
   219   * For accessing the low byte registers DIL, SIL, BPL and SPL we have to
   220   * generate a Rex prefix with the value 0x40 too.
   221   * To enable this OR the natural size with 1.
   222   */
   223  #define x86_64_rex(rex_bits)	(0x40 | (rex_bits))
   224  #define x86_64_rex_emit(inst, width, modrm_reg, index_reg, rm_base_opcode_reg) \
   225  	do { \
   226  		unsigned char __rex_bits = \
   227  			(((width) & 8) ? X86_64_REX_W : 0) | \
   228  			(((modrm_reg) & 8) ? X86_64_REX_R : 0) | \
   229  			(((index_reg) & 8) ? X86_64_REX_X : 0) | \
   230  			(((rm_base_opcode_reg) & 8) ? X86_64_REX_B : 0); \
   231  		if((__rex_bits != 0)) \
   232  		{ \
   233  			 *(inst)++ = x86_64_rex(__rex_bits); \
   234  		} \
   235  		else if(((width) & 1) && ((modrm_reg & 4) || (rm_base_opcode_reg & 4))) \
   236  		{ \
   237  			 *(inst)++ = x86_64_rex(0); \
   238  		} \
   239  	} while(0)
   240  
   241  /*
   242   * Helper for emitting the rex prefix for opcodes with 64bit default size.
   243   */
   244  #define x86_64_rex_emit64(inst, width, modrm_reg, index_reg, rm_base_opcode_reg) \
   245  	do { \
   246  		x86_64_rex_emit((inst), 0, (modrm_reg), (index_reg), (rm_base_opcode_reg)); \
   247  	} while(0)
   248  
   249  /* In 64 bit mode, all registers have a low byte subregister */
   250  #undef X86_IS_BYTE_REG
   251  #define X86_IS_BYTE_REG(reg) 1
   252  
   253  #define x86_64_reg_emit(inst, r, regno) \
   254  	do { \
   255  		x86_reg_emit((inst), ((r) & 0x7), ((regno) & 0x7)); \
   256  	} while(0)
   257  
   258  #define x86_64_mem_emit(inst, r, disp) \
   259  	do { \
   260  		x86_address_byte ((inst), 0, ((r) & 0x7), 4); \
   261  		x86_address_byte ((inst), 0, 4, 5); \
   262  		x86_imm_emit32((inst), (disp)); \
   263  	} while(0)
   264  
   265  #define x86_64_mem64_emit(inst, r, disp) \
   266  	do { \
   267  		x86_address_byte ((inst), 0, ((r) & 0x7), 4); \
   268  		x86_address_byte ((inst), 0, 4, 5); \
   269  		x86_64_imm_emit64((inst), (disp)); \
   270  	} while(0)
   271  
   272  #define x86_64_membase_emit(inst, reg, basereg, disp) \
   273  	do { \
   274  		if((basereg) == X86_64_RIP) \
   275  		{ \
   276  			x86_address_byte((inst), 0, ((reg) & 0x7), 5); \
   277  			x86_imm_emit32((inst), (disp)); \
   278  		} \
   279  		else \
   280  		{ \
   281  			x86_membase_emit((inst), ((reg) & 0x7), ((basereg) & 0x7), (disp)); \
   282  		} \
   283  	} while(0)
   284  
   285  #define x86_64_memindex_emit(inst, r, basereg, disp, indexreg, shift) \
   286  	do { \
   287  		x86_memindex_emit((inst), ((r) & 0x7), ((basereg) & 0x7), (disp), ((indexreg) & 0x7), (shift)); \
   288  	} while(0)
   289  
   290  /*
   291   * RSP, RBP and the corresponding upper registers (R12 and R13) can't be used
   292   * for relative addressing without displacement because their codes are used
   293   * for encoding addressing modes with diplacement.
   294   * So we do a membase addressing in this case with a zero offset.
   295   */
   296  #define x86_64_regp_emit(inst, r, regno) \
   297  	do { \
   298  		switch(regno) \
   299  		{ \
   300  			case X86_64_RSP: \
   301  			case X86_64_RBP: \
   302  			case X86_64_R12: \
   303  			case X86_64_R13: \
   304  			{ \
   305  				x86_64_membase_emit((inst), (r), (regno), 0); \
   306  			} \
   307  			break; \
   308  			default: \
   309  			{ \
   310  				x86_address_byte((inst), 0, ((r) & 0x7), ((regno) & 0x7)); \
   311  			} \
   312  			break; \
   313  		} \
   314  	} while(0)
   315  
   316  /*
   317   * Helper to encode an opcode where the encoding is different between
   318   * 8bit and 16 ... 64 bit width in the following way:
   319   * 8 bit == opcode given
   320   * 16 ... 64 bit = opcode given | 0x1
   321   */
   322  #define x86_64_opcode1_emit(inst, opc, size) \
   323  	do { \
   324  		switch ((size)) \
   325  		{ \
   326  			case 1: \
   327  			{ \
   328  				*(inst)++ = (unsigned char)(opc); \
   329  			} \
   330  			break;	\
   331  			case 2: \
   332  			case 4: \
   333  			case 8: \
   334  			{ \
   335  				*(inst)++ = ((unsigned char)(opc) | 0x1); \
   336  			} \
   337  			break;\
   338  			default: \
   339  			{ \
   340  				jit_assert(0); \
   341  			} \
   342  		} \
   343  	} while(0)
   344  
   345  /*
   346   * Macros to implement the simple opcodes.
   347   */
   348  #define x86_64_alu_reg_reg_size(inst, opc, dreg, sreg, size) \
   349  	do { \
   350  		switch(size) \
   351  		{ \
   352  			case 1: \
   353  			{ \
   354  				x86_64_rex_emit(inst, size, (dreg), 0, (sreg)); \
   355  				*(inst)++ = (((unsigned char)(opc)) << 3) + 2; \
   356  				x86_64_reg_emit((inst), (dreg), (sreg)); \
   357  			} \
   358  			break; \
   359  			case 2: \
   360  			{ \
   361  				*(inst)++ = (unsigned char)0x66; \
   362  			} \
   363  			case 4: \
   364  			case 8: \
   365  			{ \
   366  				x86_64_rex_emit(inst, size, (dreg), 0, (sreg)); \
   367  				*(inst)++ = (((unsigned char)(opc)) << 3) + 3; \
   368  				x86_64_reg_emit((inst), (dreg), (sreg)); \
   369  			} \
   370  		} \
   371  	} while(0)
   372  
   373  #define x86_64_alu_regp_reg_size(inst, opc, dregp, sreg, size) \
   374  	do { \
   375  		switch(size) \
   376  		{ \
   377  			case 1: \
   378  			{ \
   379  				x86_64_rex_emit(inst, size, (sreg), 0, (dregp)); \
   380  				*(inst)++ = (((unsigned char)(opc)) << 3); \
   381  				x86_64_regp_emit((inst), (sreg), (dregp));	\
   382  			} \
   383  			break; \
   384  			case 2: \
   385  			{ \
   386  				*(inst)++ = (unsigned char)0x66; \
   387  			} \
   388  			case 4: \
   389  			case 8: \
   390  			{ \
   391  				x86_64_rex_emit(inst, size, (sreg), 0, (dregp)); \
   392  				*(inst)++ = (((unsigned char)(opc)) << 3) + 1; \
   393  				x86_64_regp_emit((inst), (sreg), (dregp));	\
   394  			} \
   395  		} \
   396  	} while(0)
   397  
   398  #define x86_64_alu_mem_reg_size(inst, opc, mem, sreg, size) \
   399  	do { \
   400  		switch(size) \
   401  		{ \
   402  			case 1: \
   403  			{ \
   404  				x86_64_rex_emit(inst, size, (sreg), 0, 0); \
   405  				*(inst)++ = (((unsigned char)(opc)) << 3); \
   406  				x86_64_mem_emit((inst), (sreg), (mem));	\
   407  			} \
   408  			break; \
   409  			case 2: \
   410  			{ \
   411  				*(inst)++ = (unsigned char)0x66; \
   412  			} \
   413  			case 4: \
   414  			case 8: \
   415  			{ \
   416  				x86_64_rex_emit(inst, size, (sreg), 0, 0); \
   417  				*(inst)++ = (((unsigned char)(opc)) << 3) + 1; \
   418  				x86_64_mem_emit((inst), (sreg), (mem));	\
   419  			} \
   420  		} \
   421  	} while(0)
   422  
   423  #define x86_64_alu_membase_reg_size(inst, opc, basereg, disp, sreg, size) \
   424  	do { \
   425  		switch(size) \
   426  		{ \
   427  			case 1: \
   428  			{ \
   429  				x86_64_rex_emit(inst, size, (sreg), 0, (basereg)); \
   430  				*(inst)++ = (((unsigned char)(opc)) << 3); \
   431  				x86_64_membase_emit((inst), (sreg), (basereg), (disp)); \
   432  			} \
   433  			break; \
   434  			case 2: \
   435  			{ \
   436  				*(inst)++ = (unsigned char)0x66; \
   437  			} \
   438  			case 4: \
   439  			case 8: \
   440  			{ \
   441  				x86_64_rex_emit(inst, size, (sreg), 0, (basereg)); \
   442  				*(inst)++ = (((unsigned char)(opc)) << 3) + 1; \
   443  				x86_64_membase_emit((inst), (sreg), (basereg), (disp)); \
   444  			} \
   445  		} \
   446  	} while(0)
   447  
   448  #define x86_64_alu_memindex_reg_size(inst, opc, basereg, disp, indexreg, shift, sreg, size) \
   449  	do { \
   450  		switch(size) \
   451  		{ \
   452  			case 1: \
   453  			{ \
   454  				x86_64_rex_emit(inst, size, (sreg), (indexreg), (basereg)); \
   455  				*(inst)++ = (((unsigned char)(opc)) << 3); \
   456  				x86_64_memindex_emit((inst), (sreg), (basereg), (disp), (indexreg), (shift)); \
   457  			} \
   458  			break; \
   459  			case 2: \
   460  			{ \
   461  				*(inst)++ = (unsigned char)0x66; \
   462  			} \
   463  			case 4: \
   464  			case 8: \
   465  			{ \
   466  				x86_64_rex_emit(inst, size, (sreg), (indexreg), (basereg)); \
   467  				*(inst)++ = (((unsigned char)(opc)) << 3) + 1; \
   468  				x86_64_memindex_emit((inst), (sreg), (basereg), (disp), (indexreg), (shift)); \
   469  			} \
   470  		} \
   471  	} while(0)
   472  
   473  #define x86_64_alu_reg_regp_size(inst, opc, dreg, sregp, size) \
   474  	do { \
   475  		switch(size) \
   476  		{ \
   477  			case 1: \
   478  			{ \
   479  				x86_64_rex_emit(inst, size, (dreg), 0, (sregp)); \
   480  				*(inst)++ = (((unsigned char)(opc)) << 3) + 2; \
   481  				x86_64_regp_emit((inst), (dreg), (sregp));	\
   482  			} \
   483  			break; \
   484  			case 2: \
   485  			{ \
   486  				*(inst)++ = (unsigned char)0x66; \
   487  			} \
   488  			case 4: \
   489  			case 8: \
   490  			{ \
   491  				x86_64_rex_emit(inst, size, (dreg), 0, (sregp)); \
   492  				*(inst)++ = (((unsigned char)(opc)) << 3) + 3; \
   493  				x86_64_regp_emit((inst), (dreg), (sregp));	\
   494  			} \
   495  		} \
   496  	} while(0)
   497  
   498  #define x86_64_alu_reg_mem_size(inst, opc, dreg, mem, size) \
   499  	do { \
   500  		switch(size) \
   501  		{ \
   502  			case 1: \
   503  			{ \
   504  				x86_64_rex_emit(inst, size, (dreg), 0, 0); \
   505  				*(inst)++ = (((unsigned char)(opc)) << 3) + 2; \
   506  				x86_64_mem_emit((inst), (dreg), (mem));	\
   507  			} \
   508  			break; \
   509  			case 2: \
   510  			{ \
   511  				*(inst)++ = (unsigned char)0x66; \
   512  			} \
   513  			case 4: \
   514  			case 8: \
   515  			{ \
   516  				x86_64_rex_emit(inst, size, (dreg), 0, 0); \
   517  				*(inst)++ = (((unsigned char)(opc)) << 3) + 3; \
   518  				x86_64_mem_emit((inst), (dreg), (mem));	\
   519  			} \
   520  		} \
   521  	} while(0)
   522  
   523  #define x86_64_alu_reg_membase_size(inst, opc, dreg, basereg, disp, size) \
   524  	do { \
   525  		switch(size) \
   526  		{ \
   527  			case 1: \
   528  			{ \
   529  				x86_64_rex_emit(inst, size, (dreg), 0, (basereg)); \
   530  				*(inst)++ = (((unsigned char)(opc)) << 3) + 2; \
   531  				x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
   532  			} \
   533  			break; \
   534  			case 2: \
   535  			{ \
   536  				*(inst)++ = (unsigned char)0x66; \
   537  			} \
   538  			case 4: \
   539  			case 8: \
   540  			{ \
   541  				x86_64_rex_emit(inst, size, (dreg), 0, (basereg)); \
   542  				*(inst)++ = (((unsigned char)(opc)) << 3) + 3; \
   543  				x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
   544  			} \
   545  		} \
   546  	} while(0)
   547  
   548  #define x86_64_alu_reg_memindex_size(inst, opc, dreg, basereg, disp, indexreg, shift, size) \
   549  	do { \
   550  		switch(size) \
   551  		{ \
   552  			case 1: \
   553  			{ \
   554  				x86_64_rex_emit(inst, size, (dreg), (indexreg), (basereg)); \
   555  				*(inst)++ = (((unsigned char)(opc)) << 3) + 2; \
   556  				x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
   557  			} \
   558  			break; \
   559  			case 2: \
   560  			{ \
   561  				*(inst)++ = (unsigned char)0x66; \
   562  			} \
   563  			case 4: \
   564  			case 8: \
   565  			{ \
   566  				x86_64_rex_emit(inst, size, (dreg), (indexreg), (basereg)); \
   567  				*(inst)++ = (((unsigned char)(opc)) << 3) + 3; \
   568  				x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
   569  			} \
   570  		} \
   571  	} while(0)
   572  
   573  /*
   574   * The immediate value has to be at most 32 bit wide unless it can be sign
   575   * extended from a 8 bit or 32 bit wide value.
   576   */
   577  #define x86_64_alu_reg_imm_size(inst, opc, dreg, imm, size) \
   578  	do { \
   579  		if(x86_is_imm8((imm)) && ((size) != 1 || (dreg) != X86_64_RAX)) \
   580  		{ \
   581  			switch(size) \
   582  			{ \
   583  				case 1: \
   584  				{ \
   585  					x86_64_rex_emit(inst, size, 0, 0, (dreg)); \
   586  					*(inst)++ = (unsigned char)0x80; \
   587  				} \
   588  				break; \
   589  				case 2: \
   590  				{ \
   591  					*(inst)++ = (unsigned char)0x66; \
   592  				} \
   593  				case 4: \
   594  				case 8: \
   595  				{ \
   596  					x86_64_rex_emit(inst, size, 0, 0, (dreg)); \
   597  					*(inst)++ = (unsigned char)0x83; \
   598  				} \
   599  			} \
   600  			x86_64_reg_emit((inst), (opc), (dreg)); \
   601  			x86_imm_emit8((inst), (imm)); \
   602  		} \
   603  		else if((dreg) == X86_64_RAX) \
   604  		{ \
   605  			switch(size) \
   606  			{ \
   607  				case 1: \
   608  				{ \
   609  					*(inst)++ = (((unsigned char)(opc)) << 3) + 4; \
   610  					x86_imm_emit8((inst), (imm)); \
   611  				} \
   612  				break; \
   613  				case 2: \
   614  				{ \
   615  					*(inst)++ = (unsigned char)0x66; \
   616  					*(inst)++ = (((unsigned char)(opc)) << 3) + 5; \
   617  					x86_imm_emit16((inst), (imm)); \
   618  				} \
   619  				break; \
   620  				case 4: \
   621  				case 8: \
   622  				{ \
   623  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   624  					*(inst)++ = (((unsigned char)(opc)) << 3) + 5; \
   625  					x86_imm_emit32((inst), (imm)); \
   626  				} \
   627  			} \
   628  		} \
   629  		else \
   630  		{ \
   631  			switch(size) \
   632  			{ \
   633  				case 1: \
   634  				{ \
   635  					x86_64_rex_emit(inst, size, 0, 0, (dreg)); \
   636  					*(inst)++ = (unsigned char)0x80; \
   637  					x86_64_reg_emit((inst), (opc), (dreg)); \
   638  					x86_imm_emit8((inst), (imm)); \
   639  					jit_assert(1); \
   640  				} \
   641  				break; \
   642  				case 2: \
   643  				{ \
   644  					*(inst)++ = (unsigned char)0x66; \
   645  					x86_64_rex_emit(inst, size, 0, 0, (dreg)); \
   646  					*(inst)++ = (unsigned char)0x81; \
   647  					x86_64_reg_emit((inst), (opc), (dreg)); \
   648  					x86_imm_emit16((inst), (imm)); \
   649  				} \
   650  				break; \
   651  				case 4: \
   652  				case 8: \
   653  				{ \
   654  					x86_64_rex_emit(inst, size, 0, 0, (dreg)); \
   655  					*(inst)++ = (unsigned char)0x81; \
   656  					x86_64_reg_emit((inst), (opc), (dreg)); \
   657  					x86_imm_emit32((inst), (imm)); \
   658  				} \
   659  			} \
   660  		} \
   661  	} while(0)
   662  
   663  #define x86_64_alu_regp_imm_size(inst, opc, reg, imm, size) \
   664  	do { \
   665  		if(x86_is_imm8((imm))) \
   666  		{ \
   667  			switch(size) \
   668  			{ \
   669  				case 1: \
   670  				{ \
   671  					x86_64_rex_emit(inst, size, 0, 0, (reg)); \
   672  					*(inst)++ = (unsigned char)0x80; \
   673  				} \
   674  				break; \
   675  				case 2: \
   676  				{ \
   677  					*(inst)++ = (unsigned char)0x66; \
   678  				} \
   679  				case 4: \
   680  				case 8: \
   681  				{ \
   682  					x86_64_rex_emit(inst, size, 0, 0, (reg)); \
   683  					*(inst)++ = (unsigned char)0x83; \
   684  				} \
   685  			} \
   686  			x86_64_regp_emit((inst), (opc), (reg)); \
   687  			x86_imm_emit8((inst), (imm)); \
   688  		} \
   689  		else \
   690  		{ \
   691  			switch(size) \
   692  			{ \
   693  				case 1: \
   694  				{ \
   695  					x86_64_rex_emit(inst, size, 0, 0, (reg)); \
   696  					*(inst)++ = (unsigned char)0x80; \
   697  					x86_64_regp_emit((inst), (opc), (reg)); \
   698  					x86_imm_emit8((inst), (imm)); \
   699  					jit_assert(1); \
   700  				} \
   701  				break; \
   702  				case 2: \
   703  				{ \
   704  					*(inst)++ = (unsigned char)0x66; \
   705  					x86_64_rex_emit(inst, size, 0, 0, (reg)); \
   706  					*(inst)++ = (unsigned char)0x81; \
   707  					x86_64_regp_emit((inst), (opc), (reg)); \
   708  					x86_imm_emit16((inst), (imm)); \
   709  				} \
   710  				break; \
   711  				case 4: \
   712  				case 8: \
   713  				{ \
   714  					x86_64_rex_emit(inst, size, 0, 0, (reg)); \
   715  					*(inst)++ = (unsigned char)0x81; \
   716  					x86_64_regp_emit((inst), (opc), (reg)); \
   717  					x86_imm_emit32((inst), (imm)); \
   718  				} \
   719  			} \
   720  		} \
   721  	} while(0)
   722  
   723  #define x86_64_alu_mem_imm_size(inst, opc, mem, imm, size) \
   724  	do { \
   725  		if(x86_is_imm8((imm))) \
   726  		{ \
   727  			switch(size) \
   728  			{ \
   729  				case 1: \
   730  				{ \
   731  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   732  					*(inst)++ = (unsigned char)0x80; \
   733  				} \
   734  				break; \
   735  				case 2: \
   736  				{ \
   737  					*(inst)++ = (unsigned char)0x66; \
   738  				} \
   739  				case 4: \
   740  				case 8: \
   741  				{ \
   742  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   743  					*(inst)++ = (unsigned char)0x83; \
   744  				} \
   745  			} \
   746  			x86_64_mem_emit((inst), (opc), (mem));	\
   747  			x86_imm_emit8((inst), (imm));	\
   748  		} \
   749  		else \
   750  		{ \
   751  			switch(size) \
   752  			{ \
   753  				case 1: \
   754  				{ \
   755  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   756  					*(inst)++ = (unsigned char)0x80; \
   757  					x86_64_mem_emit((inst), (opc), (mem));	\
   758  					x86_imm_emit8((inst), (imm)); \
   759  					jit_assert(1); \
   760  				} \
   761  				break; \
   762  				case 2: \
   763  				{ \
   764  					*(inst)++ = (unsigned char)0x66; \
   765  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   766  					*(inst)++ = (unsigned char)0x81; \
   767  					x86_64_mem_emit((inst), (opc), (mem));	\
   768  					x86_imm_emit16((inst), (imm)); \
   769  				} \
   770  				break; \
   771  				case 4: \
   772  				case 8: \
   773  				{ \
   774  					x86_64_rex_emit((inst), (size), 0, 0, 0); \
   775  					*(inst)++ = (unsigned char)0x81; \
   776  					x86_64_mem_emit((inst), (opc), (mem));	\
   777  					x86_imm_emit32((inst), (imm)); \
   778  				} \
   779  			} \
   780  		} \
   781  	} while(0)
   782  
   783  #define x86_64_alu_membase_imm_size(inst, opc, basereg, disp, imm, size) \
   784  	do { \
   785  		if(x86_is_imm8((imm))) \
   786  		{ \
   787  			switch(size) \
   788  			{ \
   789  				case 1: \
   790  				{ \
   791  					x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   792  					*(inst)++ = (unsigned char)0x80; \
   793  				} \
   794  				break; \
   795  				case 2: \
   796  				{ \
   797  					*(inst)++ = (unsigned char)0x66; \
   798  				} \
   799  				case 4: \
   800  				case 8: \
   801  				{ \
   802  					x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   803  					*(inst)++ = (unsigned char)0x83; \
   804  				} \
   805  			} \
   806  			x86_64_membase_emit((inst), (opc), (basereg), (disp));	\
   807  			x86_imm_emit8((inst), (imm));	\
   808  		} \
   809  		else \
   810  		{ \
   811  			switch(size) \
   812  			{ \
   813  				case 1: \
   814  				{ \
   815  					x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   816  					*(inst)++ = (unsigned char)0x80; \
   817  					x86_64_membase_emit((inst), (opc), (basereg), (disp));	\
   818  					x86_imm_emit8((inst), (imm)); \
   819  					jit_assert(1); \
   820  				} \
   821  				break; \
   822  				case 2: \
   823  				{ \
   824  					*(inst)++ = (unsigned char)0x66; \
   825  					x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   826  					*(inst)++ = (unsigned char)0x81; \
   827  					x86_64_membase_emit((inst), (opc), (basereg), (disp));	\
   828  					x86_imm_emit16((inst), (imm)); \
   829  				} \
   830  				break; \
   831  				case 4: \
   832  				case 8: \
   833  				{ \
   834  					x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   835  					*(inst)++ = (unsigned char)0x81; \
   836  					x86_64_membase_emit((inst), (opc), (basereg), (disp));	\
   837  					x86_imm_emit32((inst), (imm)); \
   838  				} \
   839  			} \
   840  		} \
   841  	} while(0)
   842  
   843  #define x86_64_alu_memindex_imm_size(inst, opc, basereg, disp, indexreg, shift, imm, size) \
   844  	do { \
   845  		if(x86_is_imm8((imm))) \
   846  		{ \
   847  			switch(size) \
   848  			{ \
   849  				case 1: \
   850  				{ \
   851  					x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   852  					*(inst)++ = (unsigned char)0x80; \
   853  				} \
   854  				break; \
   855  				case 2: \
   856  				{ \
   857  					*(inst)++ = (unsigned char)0x66; \
   858  				} \
   859  				case 4: \
   860  				case 8: \
   861  				{ \
   862  					x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   863  					*(inst)++ = (unsigned char)0x83; \
   864  				} \
   865  			} \
   866  			x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
   867  			x86_imm_emit8((inst), (imm)); \
   868  		} \
   869  		else \
   870  		{ \
   871  			switch(size) \
   872  			{ \
   873  				case 1: \
   874  				{ \
   875  					x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   876  					*(inst)++ = (unsigned char)0x80; \
   877  					x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
   878  					x86_imm_emit8((inst), (imm)); \
   879  					jit_assert(1); \
   880  				} \
   881  				break; \
   882  				case 2: \
   883  				{ \
   884  					*(inst)++ = (unsigned char)0x66; \
   885  					x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   886  					*(inst)++ = (unsigned char)0x81; \
   887  					x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
   888  					x86_imm_emit16((inst), (imm)); \
   889  				} \
   890  				break; \
   891  				case 4: \
   892  				case 8: \
   893  				{ \
   894  					x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   895  					*(inst)++ = (unsigned char)0x81; \
   896  					x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
   897  					x86_imm_emit32((inst), (imm)); \
   898  				} \
   899  			} \
   900  		} \
   901  	} while(0)
   902  
   903  /*
   904   * Instructions with one opcode (plus optional r/m)
   905   */
   906  
   907  /*
   908   * Unary opcodes
   909   */
   910  #define x86_64_alu1_reg(inst, opc1, r, reg) \
   911  	do { \
   912  		x86_64_rex_emit((inst), 0, 0, 0, (reg)); \
   913  		*(inst)++ = (unsigned char)(opc1); \
   914  		x86_64_reg_emit((inst), (r), (reg));	\
   915  	} while(0)
   916  
   917  #define x86_64_alu1_regp(inst, opc1, r, regp) \
   918  	do { \
   919  		x86_64_rex_emit((inst), 0, 0, 0, (regp)); \
   920  		*(inst)++ = (unsigned char)(opc1); \
   921  		x86_64_regp_emit((inst), (r), (regp));	\
   922  	} while(0)
   923  
   924  #define x86_64_alu1_mem(inst, opc1, r, mem) \
   925  	do { \
   926  		*(inst)++ = (unsigned char)(opc1); \
   927  		x86_64_mem_emit((inst), (r), (mem)); \
   928  	} while(0)
   929  
   930  #define x86_64_alu1_membase(inst, opc1, r, basereg, disp) \
   931  	do { \
   932  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
   933  		*(inst)++ = (unsigned char)(opc1); \
   934  		x86_64_membase_emit((inst), (r), (basereg), (disp)); \
   935  	} while(0)
   936  
   937  #define x86_64_alu1_memindex(inst, opc1, r, basereg, disp, indexreg, shift) \
   938  	do { \
   939  		x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \
   940  		*(inst)++ = (unsigned char)(opc1); \
   941  		x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \
   942  	} while(0)
   943  
   944  #define x86_64_alu1_reg_size(inst, opc1, r, reg, size) \
   945  	do { \
   946  		if((size) == 2) \
   947  		{ \
   948  			*(inst)++ = (unsigned char)0x66; \
   949  		} \
   950  		x86_64_rex_emit((inst), (size), 0, 0, (reg)); \
   951  		x86_64_opcode1_emit((inst), (opc1), (size)); \
   952  		x86_64_reg_emit((inst), (r), (reg));	\
   953  	} while(0)
   954  
   955  #define x86_64_alu1_regp_size(inst, opc1, r, regp, size) \
   956  	do { \
   957  		if((size) == 2) \
   958  		{ \
   959  			*(inst)++ = (unsigned char)0x66; \
   960  		} \
   961  		x86_64_rex_emit((inst), (size), 0, 0, (regp)); \
   962  		x86_64_opcode1_emit((inst), (opc1), (size)); \
   963  		x86_64_regp_emit((inst), (r), (regp));	\
   964  	} while(0)
   965  
   966  #define x86_64_alu1_mem_size(inst, opc1, r, mem, size) \
   967  	do { \
   968  		if((size) == 2) \
   969  		{ \
   970  			*(inst)++ = (unsigned char)0x66; \
   971  		} \
   972  		x86_64_rex_emit((inst), (size), 0, 0, 0); \
   973  		x86_64_opcode1_emit((inst), (opc1), (size)); \
   974  		x86_64_mem_emit((inst), (r), (mem)); \
   975  	} while(0)
   976  
   977  #define x86_64_alu1_membase_size(inst, opc1, r, basereg, disp, size) \
   978  	do { \
   979  		if((size) == 2) \
   980  		{ \
   981  			*(inst)++ = (unsigned char)0x66; \
   982  		} \
   983  		x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
   984  		x86_64_opcode1_emit((inst), (opc1), (size)); \
   985  		x86_64_membase_emit((inst), (r), (basereg), (disp)); \
   986  	} while(0)
   987  
   988  #define x86_64_alu1_memindex_size(inst, opc1, r, basereg, disp, indexreg, shift, size) \
   989  	do { \
   990  		if((size) == 2) \
   991  		{ \
   992  			*(inst)++ = (unsigned char)0x66; \
   993  		} \
   994  		x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
   995  		x86_64_opcode1_emit((inst), (opc1), (size)); \
   996  		x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \
   997  	} while(0)
   998  
   999  #define x86_64_alu1_reg_reg_size(inst, opc1, dreg, sreg, size) \
  1000  	do { \
  1001  		if((size) == 2) \
  1002  		{ \
  1003  			*(inst)++ = (unsigned char)0x66; \
  1004  		} \
  1005  		x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \
  1006  		*(inst)++ = (unsigned char)(opc1); \
  1007  		x86_64_reg_emit((inst), (dreg), (sreg));	\
  1008  	} while(0)
  1009  
  1010  #define x86_64_alu1_reg_regp_size(inst, opc1, dreg, sregp, size) \
  1011  	do { \
  1012  		if((size) == 2) \
  1013  		{ \
  1014  			*(inst)++ = (unsigned char)0x66; \
  1015  		} \
  1016  		x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \
  1017  		*(inst)++ = (unsigned char)(opc1); \
  1018  		x86_64_regp_emit((inst), (dreg), (sregp));	\
  1019  	} while(0)
  1020  
  1021  #define x86_64_alu1_reg_mem_size(inst, opc1, dreg, mem, size) \
  1022  	do { \
  1023  		if((size) == 2) \
  1024  		{ \
  1025  			*(inst)++ = (unsigned char)0x66; \
  1026  		} \
  1027  		x86_64_rex_emit((inst), (size), (dreg), 0, 0); \
  1028  		*(inst)++ = (unsigned char)(opc1); \
  1029  		x86_64_mem_emit((inst), (dreg), (mem)); \
  1030  	} while(0)
  1031  
  1032  #define x86_64_alu1_reg_membase_size(inst, opc1, dreg, basereg, disp, size) \
  1033  	do { \
  1034  		if((size) == 2) \
  1035  		{ \
  1036  			*(inst)++ = (unsigned char)0x66; \
  1037  		} \
  1038  		x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \
  1039  		*(inst)++ = (unsigned char)(opc1); \
  1040  		x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  1041  	} while(0)
  1042  
  1043  #define x86_64_alu1_reg_memindex_size(inst, opc1, dreg, basereg, disp, indexreg, shift, size) \
  1044  	do { \
  1045  		if((size) == 2) \
  1046  		{ \
  1047  			*(inst)++ = (unsigned char)0x66; \
  1048  		} \
  1049  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  1050  		*(inst)++ = (unsigned char)(opc1); \
  1051  		x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  1052  	} while(0)
  1053  
  1054  #define x86_64_alu2_reg_reg_size(inst, opc1, opc2, dreg, sreg, size) \
  1055  	do { \
  1056  		if((size) == 2) \
  1057  		{ \
  1058  			*(inst)++ = (unsigned char)0x66; \
  1059  		} \
  1060  		x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \
  1061  		*(inst)++ = (unsigned char)(opc1); \
  1062  		*(inst)++ = (unsigned char)(opc2); \
  1063  		x86_64_reg_emit((inst), (dreg), (sreg));	\
  1064  	} while(0)
  1065  
  1066  #define x86_64_alu2_reg_regp_size(inst, opc1, opc2, dreg, sregp, size) \
  1067  	do { \
  1068  		if((size) == 2) \
  1069  		{ \
  1070  			*(inst)++ = (unsigned char)0x66; \
  1071  		} \
  1072  		x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \
  1073  		*(inst)++ = (unsigned char)(opc1); \
  1074  		*(inst)++ = (unsigned char)(opc2); \
  1075  		x86_64_regp_emit((inst), (dreg), (sregp));	\
  1076  	} while(0)
  1077  
  1078  #define x86_64_alu2_reg_mem_size(inst, opc1, opc2, dreg, mem, size) \
  1079  	do { \
  1080  		if((size) == 2) \
  1081  		{ \
  1082  			*(inst)++ = (unsigned char)0x66; \
  1083  		} \
  1084  		x86_64_rex_emit((inst), (size), (dreg), 0, 0); \
  1085  		*(inst)++ = (unsigned char)(opc1); \
  1086  		*(inst)++ = (unsigned char)(opc2); \
  1087  		x86_64_mem_emit((inst), (dreg), (mem)); \
  1088  	} while(0)
  1089  
  1090  #define x86_64_alu2_reg_membase_size(inst, opc1, opc2, dreg, basereg, disp, size) \
  1091  	do { \
  1092  		if((size) == 2) \
  1093  		{ \
  1094  			*(inst)++ = (unsigned char)0x66; \
  1095  		} \
  1096  		x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \
  1097  		*(inst)++ = (unsigned char)(opc1); \
  1098  		*(inst)++ = (unsigned char)(opc2); \
  1099  		x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  1100  	} while(0)
  1101  
  1102  #define x86_64_alu2_reg_memindex_size(inst, opc1, opc2, dreg, basereg, disp, indexreg, shift, size) \
  1103  	do { \
  1104  		if((size) == 2) \
  1105  		{ \
  1106  			*(inst)++ = (unsigned char)0x66; \
  1107  		} \
  1108  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  1109  		*(inst)++ = (unsigned char)(opc1); \
  1110  		*(inst)++ = (unsigned char)(opc2); \
  1111  		x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  1112  	} while(0)
  1113  
  1114  /*
  1115   * Group1 general instructions
  1116   */
  1117  #define x86_64_alu_reg_reg(inst, opc, dreg, sreg) \
  1118  	do { \
  1119  		x86_64_alu_reg_reg_size((inst), (opc), (dreg), (sreg), 8); \
  1120  	} while(0)
  1121  
  1122  #define x86_64_alu_reg_imm(inst, opc, dreg, imm) \
  1123  	do { \
  1124  		x86_64_alu_reg_imm_size((inst), (opc), (dreg), (imm), 8); \
  1125  	} while(0)
  1126  
  1127  /*
  1128   * ADC: Add with carry
  1129   */
  1130  #define x86_64_adc_reg_reg_size(inst, dreg, sreg, size) \
  1131  	do { \
  1132  		x86_64_alu_reg_reg_size((inst), 2, (dreg), (sreg), (size)); \
  1133  	} while(0)
  1134  
  1135  #define x86_64_adc_regp_reg_size(inst, dregp, sreg, size) \
  1136  	do { \
  1137  		x86_64_alu_regp_reg_size((inst), 2, (dregp), (sreg), (size)); \
  1138  	} while(0)
  1139  
  1140  #define x86_64_adc_mem_reg_size(inst, mem, sreg, size) \
  1141  	do { \
  1142  		x86_64_alu_mem_reg_size((inst), 2, (mem), (sreg), (size)); \
  1143  	} while(0)
  1144  
  1145  #define x86_64_adc_membase_reg_size(inst, basereg, disp, sreg, size) \
  1146  	do { \
  1147  		x86_64_alu_membase_reg_size((inst), 2, (basereg), (disp), (sreg), (size)); \
  1148  	} while(0)
  1149  
  1150  #define x86_64_adc_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1151  	do { \
  1152  		x86_64_alu_memindex_reg_size((inst), 2, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1153  	} while(0)
  1154  
  1155  #define x86_64_adc_reg_regp_size(inst, dreg, sregp, size) \
  1156  	do { \
  1157  		x86_64_alu_reg_regp_size((inst), 2, (dreg), (sregp), (size)); \
  1158  	} while(0)
  1159  
  1160  #define x86_64_adc_reg_mem_size(inst, dreg, mem, size) \
  1161  	do { \
  1162  		x86_64_alu_reg_mem_size((inst), 2, (dreg), (mem), (size)); \
  1163  	} while(0)
  1164  
  1165  #define x86_64_adc_reg_membase_size(inst, dreg, basereg, disp, size) \
  1166  	do { \
  1167  		x86_64_alu_reg_membase_size((inst), 2, (dreg), (basereg), (disp), (size)); \
  1168  	} while(0)
  1169  
  1170  #define x86_64_adc_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1171  	do { \
  1172  		x86_64_alu_reg_memindex_size((inst), 2, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1173  	} while(0)
  1174  
  1175  #define x86_64_adc_reg_imm_size(inst, dreg, imm, size) \
  1176  	do { \
  1177  		x86_64_alu_reg_imm_size((inst), 2, (dreg), (imm), (size)); \
  1178  	} while(0)
  1179  
  1180  #define x86_64_adc_regp_imm_size(inst, reg, imm, size) \
  1181  	do { \
  1182  		x86_64_alu_regp_imm_size((inst), 2, (reg), (imm), (size)); \
  1183  	} while(0)
  1184  
  1185  #define x86_64_adc_mem_imm_size(inst, mem, imm, size) \
  1186  	do { \
  1187  		x86_64_alu_mem_imm_size(inst, 2, mem, imm, size); \
  1188  	} while(0)
  1189  
  1190  #define x86_64_adc_membase_imm_size(inst, basereg, disp, imm, size) \
  1191  	do { \
  1192  		x86_64_alu_membase_imm_size((inst), 2, (basereg), (disp), (imm), (size)); \
  1193  	} while(0)
  1194  
  1195  #define x86_64_adc_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1196  	do { \
  1197  		x86_64_alu_memindex_imm_size((inst), 2, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1198  	} while(0)
  1199  
  1200  /*
  1201   * ADD
  1202   */
  1203  #define x86_64_add_reg_reg_size(inst, dreg, sreg, size) \
  1204  	do { \
  1205  		x86_64_alu_reg_reg_size((inst), 0, (dreg), (sreg), (size)); \
  1206  	} while(0)
  1207  
  1208  #define x86_64_add_regp_reg_size(inst, dregp, sreg, size) \
  1209  	do { \
  1210  		x86_64_alu_regp_reg_size((inst), 0, (dregp), (sreg), (size)); \
  1211  	} while(0)
  1212  
  1213  #define x86_64_add_mem_reg_size(inst, mem, sreg, size) \
  1214  	do { \
  1215  		x86_64_alu_mem_reg_size((inst), 0, (mem), (sreg), (size)); \
  1216  	} while(0)
  1217  
  1218  #define x86_64_add_membase_reg_size(inst, basereg, disp, sreg, size) \
  1219  	do { \
  1220  		x86_64_alu_membase_reg_size((inst), 0, (basereg), (disp), (sreg), (size)); \
  1221  	} while(0)
  1222  
  1223  #define x86_64_add_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1224  	do { \
  1225  		x86_64_alu_memindex_reg_size((inst), 0, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1226  	} while(0)
  1227  
  1228  #define x86_64_add_reg_regp_size(inst, dreg, sregp, size) \
  1229  	do { \
  1230  		x86_64_alu_reg_regp_size((inst), 0, (dreg), (sregp), (size)); \
  1231  	} while(0)
  1232  
  1233  #define x86_64_add_reg_mem_size(inst, dreg, mem, size) \
  1234  	do { \
  1235  		x86_64_alu_reg_mem_size((inst), 0, (dreg), (mem), (size)); \
  1236  	} while(0)
  1237  
  1238  #define x86_64_add_reg_membase_size(inst, dreg, basereg, disp, size) \
  1239  	do { \
  1240  		x86_64_alu_reg_membase_size((inst), 0, (dreg), (basereg), (disp), (size)); \
  1241  	} while(0)
  1242  
  1243  #define x86_64_add_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1244  	do { \
  1245  		x86_64_alu_reg_memindex_size((inst), 0, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1246  	} while(0)
  1247  
  1248  #define x86_64_add_reg_imm_size(inst, dreg, imm, size) \
  1249  	do { \
  1250  		x86_64_alu_reg_imm_size((inst), 0, (dreg), (imm), (size)); \
  1251  	} while(0)
  1252  
  1253  #define x86_64_add_regp_imm_size(inst, reg, imm, size) \
  1254  	do { \
  1255  		x86_64_alu_regp_imm_size((inst), 0, (reg), (imm), (size)); \
  1256  	} while(0)
  1257  
  1258  #define x86_64_add_mem_imm_size(inst, mem, imm, size) \
  1259  	do { \
  1260  		x86_64_alu_mem_imm_size(inst, 0, mem, imm, size); \
  1261  	} while(0)
  1262  
  1263  #define x86_64_add_membase_imm_size(inst, basereg, disp, imm, size) \
  1264  	do { \
  1265  		x86_64_alu_membase_imm_size((inst), 0, (basereg), (disp), (imm), (size)); \
  1266  	} while(0)
  1267  
  1268  #define x86_64_add_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1269  	do { \
  1270  		x86_64_alu_memindex_imm_size((inst), 0, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1271  	} while(0)
  1272  
  1273  /*
  1274   * AND
  1275   */
  1276  #define x86_64_and_reg_reg_size(inst, dreg, sreg, size) \
  1277  	do { \
  1278  		x86_64_alu_reg_reg_size((inst), 4, (dreg), (sreg), (size)); \
  1279  	} while(0)
  1280  
  1281  #define x86_64_and_regp_reg_size(inst, dregp, sreg, size) \
  1282  	do { \
  1283  		x86_64_alu_regp_reg_size((inst), 4, (dregp), (sreg), (size)); \
  1284  	} while(0)
  1285  
  1286  #define x86_64_and_mem_reg_size(inst, mem, sreg, size) \
  1287  	do { \
  1288  		x86_64_alu_mem_reg_size((inst), 4, (mem), (sreg), (size)); \
  1289  	} while(0)
  1290  
  1291  #define x86_64_and_membase_reg_size(inst, basereg, disp, sreg, size) \
  1292  	do { \
  1293  		x86_64_alu_membase_reg_size((inst), 4, (basereg), (disp), (sreg), (size)); \
  1294  	} while(0)
  1295  
  1296  #define x86_64_and_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1297  	do { \
  1298  		x86_64_alu_memindex_reg_size((inst), 4, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1299  	} while(0)
  1300  
  1301  #define x86_64_and_reg_regp_size(inst, dreg, sregp, size) \
  1302  	do { \
  1303  		x86_64_alu_reg_regp_size((inst), 4, (dreg), (sregp), (size)); \
  1304  	} while(0)
  1305  
  1306  #define x86_64_and_reg_mem_size(inst, dreg, mem, size) \
  1307  	do { \
  1308  		x86_64_alu_reg_mem_size((inst), 4, (dreg), (mem), (size)); \
  1309  	} while(0)
  1310  
  1311  #define x86_64_and_reg_membase_size(inst, dreg, basereg, disp, size) \
  1312  	do { \
  1313  		x86_64_alu_reg_membase_size((inst), 4, (dreg), (basereg), (disp), (size)); \
  1314  	} while(0)
  1315  
  1316  #define x86_64_and_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1317  	do { \
  1318  		x86_64_alu_reg_memindex_size((inst), 4, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1319  	} while(0)
  1320  
  1321  #define x86_64_and_reg_imm_size(inst, dreg, imm, size) \
  1322  	do { \
  1323  		x86_64_alu_reg_imm_size((inst), 4, (dreg), (imm), (size)); \
  1324  	} while(0)
  1325  
  1326  #define x86_64_and_regp_imm_size(inst, reg, imm, size) \
  1327  	do { \
  1328  		x86_64_alu_regp_imm_size((inst), 4, (reg), (imm), (size)); \
  1329  	} while(0)
  1330  
  1331  #define x86_64_and_mem_imm_size(inst, mem, imm, size) \
  1332  	do { \
  1333  		x86_64_alu_mem_imm_size(inst, 4, mem, imm, size); \
  1334  	} while(0)
  1335  
  1336  #define x86_64_and_membase_imm_size(inst, basereg, disp, imm, size) \
  1337  	do { \
  1338  		x86_64_alu_membase_imm_size((inst), 4, (basereg), (disp), (imm), (size)); \
  1339  	} while(0)
  1340  
  1341  #define x86_64_and_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1342  	do { \
  1343  		x86_64_alu_memindex_imm_size((inst), 4, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1344  	} while(0)
  1345  
  1346  /*
  1347   * CMP: compare
  1348   */
  1349  #define x86_64_cmp_reg_reg_size(inst, dreg, sreg, size) \
  1350  	do { \
  1351  		x86_64_alu_reg_reg_size((inst), 7, (dreg), (sreg), (size)); \
  1352  	} while(0)
  1353  
  1354  #define x86_64_cmp_regp_reg_size(inst, dregp, sreg, size) \
  1355  	do { \
  1356  		x86_64_alu_regp_reg_size((inst), 7, (dregp), (sreg), (size)); \
  1357  	} while(0)
  1358  
  1359  #define x86_64_cmp_mem_reg_size(inst, mem, sreg, size) \
  1360  	do { \
  1361  		x86_64_alu_mem_reg_size((inst), 7, (mem), (sreg), (size)); \
  1362  	} while(0)
  1363  
  1364  #define x86_64_cmp_membase_reg_size(inst, basereg, disp, sreg, size) \
  1365  	do { \
  1366  		x86_64_alu_membase_reg_size((inst), 7, (basereg), (disp), (sreg), (size)); \
  1367  	} while(0)
  1368  
  1369  #define x86_64_cmp_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1370  	do { \
  1371  		x86_64_alu_memindex_reg_size((inst), 7, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1372  	} while(0)
  1373  
  1374  #define x86_64_cmp_reg_regp_size(inst, dreg, sregp, size) \
  1375  	do { \
  1376  		x86_64_alu_reg_regp_size((inst), 7, (dreg), (sregp), (size)); \
  1377  	} while(0)
  1378  
  1379  #define x86_64_cmp_reg_mem_size(inst, dreg, mem, size) \
  1380  	do { \
  1381  		x86_64_alu_reg_mem_size((inst), 7, (dreg), (mem), (size)); \
  1382  	} while(0)
  1383  
  1384  #define x86_64_cmp_reg_membase_size(inst, dreg, basereg, disp, size) \
  1385  	do { \
  1386  		x86_64_alu_reg_membase_size((inst), 7, (dreg), (basereg), (disp), (size)); \
  1387  	} while(0)
  1388  
  1389  #define x86_64_cmp_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1390  	do { \
  1391  		x86_64_alu_reg_memindex_size((inst), 7, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1392  	} while(0)
  1393  
  1394  #define x86_64_cmp_reg_imm_size(inst, dreg, imm, size) \
  1395  	do { \
  1396  		x86_64_alu_reg_imm_size((inst), 7, (dreg), (imm), (size)); \
  1397  	} while(0)
  1398  
  1399  #define x86_64_cmp_regp_imm_size(inst, reg, imm, size) \
  1400  	do { \
  1401  		x86_64_alu_regp_imm_size((inst), 7, (reg), (imm), (size)); \
  1402  	} while(0)
  1403  
  1404  #define x86_64_cmp_mem_imm_size(inst, mem, imm, size) \
  1405  	do { \
  1406  		x86_64_alu_mem_imm_size(inst, 7, mem, imm, size); \
  1407  	} while(0)
  1408  
  1409  #define x86_64_cmp_membase_imm_size(inst, basereg, disp, imm, size) \
  1410  	do { \
  1411  		x86_64_alu_membase_imm_size((inst), 7, (basereg), (disp), (imm), (size)); \
  1412  	} while(0)
  1413  
  1414  #define x86_64_cmp_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1415  	do { \
  1416  		x86_64_alu_memindex_imm_size((inst), 7, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1417  	} while(0)
  1418  
  1419  /*
  1420   * OR
  1421   */
  1422  #define x86_64_or_reg_reg_size(inst, dreg, sreg, size) \
  1423  	do { \
  1424  		x86_64_alu_reg_reg_size((inst), 1, (dreg), (sreg), (size)); \
  1425  	} while(0)
  1426  
  1427  #define x86_64_or_regp_reg_size(inst, dregp, sreg, size) \
  1428  	do { \
  1429  		x86_64_alu_regp_reg_size((inst), 1, (dregp), (sreg), (size)); \
  1430  	} while(0)
  1431  
  1432  #define x86_64_or_mem_reg_size(inst, mem, sreg, size) \
  1433  	do { \
  1434  		x86_64_alu_mem_reg_size((inst), 1, (mem), (sreg), (size)); \
  1435  	} while(0)
  1436  
  1437  #define x86_64_or_membase_reg_size(inst, basereg, disp, sreg, size) \
  1438  	do { \
  1439  		x86_64_alu_membase_reg_size((inst), 1, (basereg), (disp), (sreg), (size)); \
  1440  	} while(0)
  1441  
  1442  #define x86_64_or_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1443  	do { \
  1444  		x86_64_alu_memindex_reg_size((inst), 1, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1445  	} while(0)
  1446  
  1447  #define x86_64_or_reg_regp_size(inst, dreg, sregp, size) \
  1448  	do { \
  1449  		x86_64_alu_reg_regp_size((inst), 1, (dreg), (sregp), (size)); \
  1450  	} while(0)
  1451  
  1452  #define x86_64_or_reg_mem_size(inst, dreg, mem, size) \
  1453  	do { \
  1454  		x86_64_alu_reg_mem_size((inst), 1, (dreg), (mem), (size)); \
  1455  	} while(0)
  1456  
  1457  #define x86_64_or_reg_membase_size(inst, dreg, basereg, disp, size) \
  1458  	do { \
  1459  		x86_64_alu_reg_membase_size((inst), 1, (dreg), (basereg), (disp), (size)); \
  1460  	} while(0)
  1461  
  1462  #define x86_64_or_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1463  	do { \
  1464  		x86_64_alu_reg_memindex_size((inst), 1, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1465  	} while(0)
  1466  
  1467  #define x86_64_or_reg_imm_size(inst, dreg, imm, size) \
  1468  	do { \
  1469  		x86_64_alu_reg_imm_size((inst), 1, (dreg), (imm), (size)); \
  1470  	} while(0)
  1471  
  1472  #define x86_64_or_regp_imm_size(inst, reg, imm, size) \
  1473  	do { \
  1474  		x86_64_alu_regp_imm_size((inst), 1, (reg), (imm), (size)); \
  1475  	} while(0)
  1476  
  1477  #define x86_64_or_mem_imm_size(inst, mem, imm, size) \
  1478  	do { \
  1479  		x86_64_alu_mem_imm_size(inst, 1, mem, imm, size); \
  1480  	} while(0)
  1481  
  1482  #define x86_64_or_membase_imm_size(inst, basereg, disp, imm, size) \
  1483  	do { \
  1484  		x86_64_alu_membase_imm_size((inst), 1, (basereg), (disp), (imm), (size)); \
  1485  	} while(0)
  1486  
  1487  #define x86_64_or_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1488  	do { \
  1489  		x86_64_alu_memindex_imm_size((inst), 1, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1490  	} while(0)
  1491  
  1492  /*
  1493   * SBB: Subtract with borrow from al
  1494   */
  1495  #define x86_64_sbb_reg_reg_size(inst, dreg, sreg, size) \
  1496  	do { \
  1497  		x86_64_alu_reg_reg_size((inst), 3, (dreg), (sreg), (size)); \
  1498  	} while(0)
  1499  
  1500  #define x86_64_sbb_regp_reg_size(inst, dregp, sreg, size) \
  1501  	do { \
  1502  		x86_64_alu_regp_reg_size((inst), 3, (dregp), (sreg), (size)); \
  1503  	} while(0)
  1504  
  1505  #define x86_64_sbb_mem_reg_size(inst, mem, sreg, size) \
  1506  	do { \
  1507  		x86_64_alu_mem_reg_size((inst), 3, (mem), (sreg), (size)); \
  1508  	} while(0)
  1509  
  1510  #define x86_64_sbb_membase_reg_size(inst, basereg, disp, sreg, size) \
  1511  	do { \
  1512  		x86_64_alu_membase_reg_size((inst), 3, (basereg), (disp), (sreg), (size)); \
  1513  	} while(0)
  1514  
  1515  #define x86_64_sbb_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1516  	do { \
  1517  		x86_64_alu_memindex_reg_size((inst), 3, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1518  	} while(0)
  1519  
  1520  #define x86_64_sbb_reg_regp_size(inst, dreg, sregp, size) \
  1521  	do { \
  1522  		x86_64_alu_reg_regp_size((inst), 3, (dreg), (sregp), (size)); \
  1523  	} while(0)
  1524  
  1525  #define x86_64_sbb_reg_mem_size(inst, dreg, mem, size) \
  1526  	do { \
  1527  		x86_64_alu_reg_mem_size((inst), 3, (dreg), (mem), (size)); \
  1528  	} while(0)
  1529  
  1530  #define x86_64_sbb_reg_membase_size(inst, dreg, basereg, disp, size) \
  1531  	do { \
  1532  		x86_64_alu_reg_membase_size((inst), 3, (dreg), (basereg), (disp), (size)); \
  1533  	} while(0)
  1534  
  1535  #define x86_64_sbb_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1536  	do { \
  1537  		x86_64_alu_reg_memindex_size((inst), 3, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1538  	} while(0)
  1539  
  1540  #define x86_64_sbb_reg_imm_size(inst, dreg, imm, size) \
  1541  	do { \
  1542  		x86_64_alu_reg_imm_size((inst), 3, (dreg), (imm), (size)); \
  1543  	} while(0)
  1544  
  1545  #define x86_64_sbb_regp_imm_size(inst, reg, imm, size) \
  1546  	do { \
  1547  		x86_64_alu_regp_imm_size((inst), 3, (reg), (imm), (size)); \
  1548  	} while(0)
  1549  
  1550  #define x86_64_sbb_mem_imm_size(inst, mem, imm, size) \
  1551  	do { \
  1552  		x86_64_alu_mem_imm_size(inst, 3, mem, imm, size); \
  1553  	} while(0)
  1554  
  1555  #define x86_64_sbb_membase_imm_size(inst, basereg, disp, imm, size) \
  1556  	do { \
  1557  		x86_64_alu_membase_imm_size((inst), 3, (basereg), (disp), (imm), (size)); \
  1558  	} while(0)
  1559  
  1560  #define x86_64_sbb_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1561  	do { \
  1562  		x86_64_alu_memindex_imm_size((inst), 3, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1563  	} while(0)
  1564  
  1565  /*
  1566   * SUB: Subtract
  1567   */
  1568  #define x86_64_sub_reg_reg_size(inst, dreg, sreg, size) \
  1569  	do { \
  1570  		x86_64_alu_reg_reg_size((inst), 5, (dreg), (sreg), (size)); \
  1571  	} while(0)
  1572  
  1573  #define x86_64_sub_regp_reg_size(inst, dregp, sreg, size) \
  1574  	do { \
  1575  		x86_64_alu_regp_reg_size((inst), 5, (dregp), (sreg), (size)); \
  1576  	} while(0)
  1577  
  1578  #define x86_64_sub_mem_reg_size(inst, mem, sreg, size) \
  1579  	do { \
  1580  		x86_64_alu_mem_reg_size((inst), 5, (mem), (sreg), (size)); \
  1581  	} while(0)
  1582  
  1583  #define x86_64_sub_membase_reg_size(inst, basereg, disp, sreg, size) \
  1584  	do { \
  1585  		x86_64_alu_membase_reg_size((inst), 5, (basereg), (disp), (sreg), (size)); \
  1586  	} while(0)
  1587  
  1588  #define x86_64_sub_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1589  	do { \
  1590  		x86_64_alu_memindex_reg_size((inst), 5, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1591  	} while(0)
  1592  
  1593  #define x86_64_sub_reg_regp_size(inst, dreg, sregp, size) \
  1594  	do { \
  1595  		x86_64_alu_reg_regp_size((inst), 5, (dreg), (sregp), (size)); \
  1596  	} while(0)
  1597  
  1598  #define x86_64_sub_reg_mem_size(inst, dreg, mem, size) \
  1599  	do { \
  1600  		x86_64_alu_reg_mem_size((inst), 5, (dreg), (mem), (size)); \
  1601  	} while(0)
  1602  
  1603  #define x86_64_sub_reg_membase_size(inst, dreg, basereg, disp, size) \
  1604  	do { \
  1605  		x86_64_alu_reg_membase_size((inst), 5, (dreg), (basereg), (disp), (size)); \
  1606  	} while(0)
  1607  
  1608  #define x86_64_sub_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1609  	do { \
  1610  		x86_64_alu_reg_memindex_size((inst), 5, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1611  	} while(0)
  1612  
  1613  #define x86_64_sub_reg_imm_size(inst, dreg, imm, size) \
  1614  	do { \
  1615  		x86_64_alu_reg_imm_size((inst), 5, (dreg), (imm), (size)); \
  1616  	} while(0)
  1617  
  1618  #define x86_64_sub_regp_imm_size(inst, reg, imm, size) \
  1619  	do { \
  1620  		x86_64_alu_regp_imm_size((inst), 5, (reg), (imm), (size)); \
  1621  	} while(0)
  1622  
  1623  #define x86_64_sub_mem_imm_size(inst, mem, imm, size) \
  1624  	do { \
  1625  		x86_64_alu_mem_imm_size(inst, 5, mem, imm, size); \
  1626  	} while(0)
  1627  
  1628  #define x86_64_sub_membase_imm_size(inst, basereg, disp, imm, size) \
  1629  	do { \
  1630  		x86_64_alu_membase_imm_size((inst), 5, (basereg), (disp), (imm), (size)); \
  1631  	} while(0)
  1632  
  1633  #define x86_64_sub_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1634  	do { \
  1635  		x86_64_alu_memindex_imm_size((inst), 5, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1636  	} while(0)
  1637  
  1638  /*
  1639   * XOR
  1640   */
  1641  #define x86_64_xor_reg_reg_size(inst, dreg, sreg, size) \
  1642  	do { \
  1643  		x86_64_alu_reg_reg_size((inst), 6, (dreg), (sreg), (size)); \
  1644  	} while(0)
  1645  
  1646  #define x86_64_xor_regp_reg_size(inst, dregp, sreg, size) \
  1647  	do { \
  1648  		x86_64_alu_regp_reg_size((inst), 6, (dregp), (sreg), (size)); \
  1649  	} while(0)
  1650  
  1651  #define x86_64_xor_mem_reg_size(inst, mem, sreg, size) \
  1652  	do { \
  1653  		x86_64_alu_mem_reg_size((inst), 6, (mem), (sreg), (size)); \
  1654  	} while(0)
  1655  
  1656  #define x86_64_xor_membase_reg_size(inst, basereg, disp, sreg, size) \
  1657  	do { \
  1658  		x86_64_alu_membase_reg_size((inst), 6, (basereg), (disp), (sreg), (size)); \
  1659  	} while(0)
  1660  
  1661  #define x86_64_xor_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  1662  	do { \
  1663  		x86_64_alu_memindex_reg_size((inst), 6, (basereg), (disp), (indexreg), (shift), (sreg), (size)); \
  1664  	} while(0)
  1665  
  1666  #define x86_64_xor_reg_regp_size(inst, dreg, sregp, size) \
  1667  	do { \
  1668  		x86_64_alu_reg_regp_size((inst), 6, (dreg), (sregp), (size)); \
  1669  	} while(0)
  1670  
  1671  #define x86_64_xor_reg_mem_size(inst, dreg, mem, size) \
  1672  	do { \
  1673  		x86_64_alu_reg_mem_size((inst), 6, (dreg), (mem), (size)); \
  1674  	} while(0)
  1675  
  1676  #define x86_64_xor_reg_membase_size(inst, dreg, basereg, disp, size) \
  1677  	do { \
  1678  		x86_64_alu_reg_membase_size((inst), 6, (dreg), (basereg), (disp), (size)); \
  1679  	} while(0)
  1680  
  1681  #define x86_64_xor_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  1682  	do { \
  1683  		x86_64_alu_reg_memindex_size((inst), 6, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  1684  	} while(0)
  1685  
  1686  #define x86_64_xor_reg_imm_size(inst, dreg, imm, size) \
  1687  	do { \
  1688  		x86_64_alu_reg_imm_size((inst), 6, (dreg), (imm), (size)); \
  1689  	} while(0)
  1690  
  1691  #define x86_64_xor_regp_imm_size(inst, reg, imm, size) \
  1692  	do { \
  1693  		x86_64_alu_regp_imm_size((inst), 6, (reg), (imm), (size)); \
  1694  	} while(0)
  1695  
  1696  #define x86_64_xor_mem_imm_size(inst, mem, imm, size) \
  1697  	do { \
  1698  		x86_64_alu_mem_imm_size(inst, 6, mem, imm, size); \
  1699  	} while(0)
  1700  
  1701  #define x86_64_xor_membase_imm_size(inst, basereg, disp, imm, size) \
  1702  	do { \
  1703  		x86_64_alu_membase_imm_size((inst), 6, (basereg), (disp), (imm), (size)); \
  1704  	} while(0)
  1705  
  1706  #define x86_64_xor_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  1707  	do { \
  1708  		x86_64_alu_memindex_imm_size((inst), 6, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  1709  	} while(0)
  1710  
  1711  /*
  1712   * dec
  1713   */
  1714  #define x86_64_dec_reg_size(inst, reg, size) \
  1715  	do { \
  1716  		x86_64_alu1_reg_size((inst), 0xfe, 1, (reg), (size)); \
  1717  	} while(0)
  1718  
  1719  #define x86_64_dec_regp_size(inst, regp, size) \
  1720  	do { \
  1721  		x86_64_alu1_regp_size((inst), 0xfe, 1, (regp), (size)); \
  1722  	} while(0)
  1723  
  1724  #define x86_64_dec_mem_size(inst, mem, size) \
  1725  	do { \
  1726  		x86_64_alu1_mem_size((inst), 0xfe, 1, (mem), (size)); \
  1727  	} while(0)
  1728  
  1729  #define x86_64_dec_membase_size(inst, basereg, disp, size) \
  1730  	do { \
  1731  		x86_64_alu1_membase_size((inst), 0xfe, 1, (basereg), (disp), (size)); \
  1732  	} while(0)
  1733  
  1734  #define x86_64_dec_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1735  	do { \
  1736  		x86_64_alu1_memindex_size((inst), 0xfe, 1, (basereg), (disp), (indexreg), (shift), (size)); \
  1737  	} while(0)
  1738  
  1739  /*
  1740   * div: unsigned division RDX:RAX / operand
  1741   */
  1742  #define x86_64_div_reg_size(inst, reg, size) \
  1743  	do { \
  1744  		x86_64_alu1_reg_size((inst), 0xf6, 6, (reg), (size)); \
  1745  	} while(0)
  1746  
  1747  #define x86_64_div_regp_size(inst, regp, size) \
  1748  	do { \
  1749  		x86_64_alu1_regp_size((inst), 0xf6, 6, (regp), (size)); \
  1750  	} while(0)
  1751  
  1752  #define x86_64_div_mem_size(inst, mem, size) \
  1753  	do { \
  1754  		x86_64_alu1_mem_size((inst), 0xf6, 6, (mem), (size)); \
  1755  	} while(0)
  1756  
  1757  #define x86_64_div_membase_size(inst, basereg, disp, size) \
  1758  	do { \
  1759  		x86_64_alu1_membase_size((inst), 0xf6, 6, (basereg), (disp), (size)); \
  1760  	} while(0)
  1761  
  1762  #define x86_64_div_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1763  	do { \
  1764  		x86_64_alu1_memindex_size((inst), 0xf6, 6, (basereg), (disp), (indexreg), (shift), (size)); \
  1765  	} while(0)
  1766  
  1767  /*
  1768   * idiv: signed division RDX:RAX / operand
  1769   */
  1770  #define x86_64_idiv_reg_size(inst, reg, size) \
  1771  	do { \
  1772  		x86_64_alu1_reg_size((inst), 0xf6, 7, (reg), (size)); \
  1773  	} while(0)
  1774  
  1775  #define x86_64_idiv_regp_size(inst, regp, size) \
  1776  	do { \
  1777  		x86_64_alu1_regp_size((inst), 0xf6, 7, (regp), (size)); \
  1778  	} while(0)
  1779  
  1780  #define x86_64_idiv_mem_size(inst, mem, size) \
  1781  	do { \
  1782  		x86_64_alu1_mem_size((inst), 0xf6, 7, (mem), (size)); \
  1783  	} while(0)
  1784  
  1785  #define x86_64_idiv_membase_size(inst, basereg, disp, size) \
  1786  	do { \
  1787  		x86_64_alu1_membase_size((inst), 0xf6, 7, (basereg), (disp), (size)); \
  1788  	} while(0)
  1789  
  1790  #define x86_64_idiv_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1791  	do { \
  1792  		x86_64_alu1_memindex_size((inst), 0xf6, 7, (basereg), (disp), (indexreg), (shift), (size)); \
  1793  	} while(0)
  1794  
  1795  /*
  1796   * inc
  1797   */
  1798  #define x86_64_inc_reg_size(inst, reg, size) \
  1799  	do { \
  1800  		x86_64_alu1_reg_size((inst), 0xfe, 0, (reg), (size)); \
  1801  	} while(0)
  1802  
  1803  #define x86_64_inc_regp_size(inst, regp, size) \
  1804  	do { \
  1805  		x86_64_alu1_regp_size((inst), 0xfe, 0, (regp), (size)); \
  1806  	} while(0)
  1807  
  1808  #define x86_64_inc_mem_size(inst, mem, size) \
  1809  	do { \
  1810  		x86_64_alu1_mem_size((inst), 0xfe, 0, (mem), (size)); \
  1811  	} while(0)
  1812  
  1813  #define x86_64_inc_membase_size(inst, basereg, disp, size) \
  1814  	do { \
  1815  		x86_64_alu1_membase_size((inst), 0xfe, 0, (basereg), (disp), (size)); \
  1816  	} while(0)
  1817  
  1818  #define x86_64_inc_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1819  	do { \
  1820  		x86_64_alu1_memindex_size((inst), 0xfe, 0, (basereg), (disp), (indexreg), (shift), (size)); \
  1821  	} while(0)
  1822  
  1823  /*
  1824   * mul: multiply RDX:RAX = RAX * operand
  1825   * is_signed == 0 -> unsigned multiplication
  1826   * signed multiplication otherwise.
  1827   */
  1828  #define x86_64_mul_reg_issigned_size(inst, reg, is_signed, size) \
  1829  	do { \
  1830  		x86_64_alu1_reg_size((inst), 0xf6, ((is_signed) ? 5 : 4), (reg), (size)); \
  1831  	} while(0)
  1832  
  1833  #define x86_64_mul_regp_issigned_size(inst, regp, is_signed, size) \
  1834  	do { \
  1835  		x86_64_alu1_regp_size((inst), 0xf6, ((is_signed) ? 5 : 4), (regp), (size)); \
  1836  	} while(0)
  1837  
  1838  #define x86_64_mul_mem_issigned_size(inst, mem, is_signed, size) \
  1839  	do { \
  1840  		x86_64_alu1_mem_size((inst), 0xf6, ((is_signed) ? 5 : 4), (mem), (size)); \
  1841  	} while(0)
  1842  
  1843  #define x86_64_mul_membase_issigned_size(inst, basereg, disp, is_signed, size) \
  1844  	do { \
  1845  		x86_64_alu1_membase_size((inst), 0xf6, ((is_signed) ? 5 : 4), (basereg), (disp), (size)); \
  1846  	} while(0)
  1847  
  1848  #define x86_64_mul_memindex_issigned_size(inst, basereg, disp, indexreg, shift, is_signed, size) \
  1849  	do { \
  1850  		x86_64_alu1_memindex_size((inst), 0xf6, ((is_signed) ? 5 : 4), (basereg), (disp), (indexreg), (shift), (size)); \
  1851  	} while(0)
  1852  
  1853  /*
  1854   * neg
  1855   */
  1856  #define x86_64_neg_reg_size(inst, reg, size) \
  1857  	do { \
  1858  		x86_64_alu1_reg_size((inst), 0xf6, 3, (reg), (size)); \
  1859  	} while(0)
  1860  
  1861  #define x86_64_neg_regp_size(inst, regp, size) \
  1862  	do { \
  1863  		x86_64_alu1_regp_size((inst), 0xf6, 3, (regp), (size)); \
  1864  	} while(0)
  1865  
  1866  #define x86_64_neg_mem_size(inst, mem, size) \
  1867  	do { \
  1868  		x86_64_alu1_mem_size((inst), 0xf6, 3, (mem), (size)); \
  1869  	} while(0)
  1870  
  1871  #define x86_64_neg_membase_size(inst, basereg, disp, size) \
  1872  	do { \
  1873  		x86_64_alu1_membase_size((inst), 0xf6, 3, (basereg), (disp), (size)); \
  1874  	} while(0)
  1875  
  1876  #define x86_64_neg_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1877  	do { \
  1878  		x86_64_alu1_memindex_size((inst), 0xf6, 3, (basereg), (disp), (indexreg), (shift), (size)); \
  1879  	} while(0)
  1880  
  1881  /*
  1882   * not
  1883   */
  1884  #define x86_64_not_reg_size(inst, reg, size) \
  1885  	do { \
  1886  		x86_64_alu1_reg_size((inst), 0xf6, 2, (reg), (size)); \
  1887  	} while(0)
  1888  
  1889  #define x86_64_not_regp_size(inst, regp, size) \
  1890  	do { \
  1891  		x86_64_alu1_regp_size((inst), 0xf6, 2, (regp), (size)); \
  1892  	} while(0)
  1893  
  1894  #define x86_64_not_mem_size(inst, mem, size) \
  1895  	do { \
  1896  		x86_64_alu1_mem_size((inst), 0xf6, 2, (mem), (size)); \
  1897  	} while(0)
  1898  
  1899  #define x86_64_not_membase_size(inst, basereg, disp, size) \
  1900  	do { \
  1901  		x86_64_alu1_membase_size((inst), 0xf6, 2, (basereg), (disp), (size)); \
  1902  	} while(0)
  1903  
  1904  #define x86_64_not_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  1905  	do { \
  1906  		x86_64_alu1_memindex_size((inst), 0xf6, 2, (basereg), (disp), (indexreg), (shift), (size)); \
  1907  	} while(0)
  1908  
  1909  /*
  1910   * Note: x86_64_clear_reg () changes the condition code!
  1911   */
  1912  #define x86_64_clear_reg(inst, reg) \
  1913  	x86_64_xor_reg_reg_size((inst), (reg), (reg), 4)
  1914  
  1915  /*
  1916   * shift instructions
  1917   */
  1918  #define x86_64_shift_reg_imm_size(inst, opc, dreg, imm, size) \
  1919  	do { \
  1920  		if((imm) == 1) \
  1921  		{ \
  1922  			if((size) == 2) \
  1923  			{ \
  1924  				*(inst)++ = (unsigned char)0x66; \
  1925  			} \
  1926  			x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \
  1927  			x86_64_opcode1_emit((inst), 0xd0, (size)); \
  1928  			x86_64_reg_emit((inst), (opc), (dreg)); \
  1929  		} \
  1930  		else \
  1931  		{ \
  1932  			if((size) == 2) \
  1933  			{ \
  1934  				*(inst)++ = (unsigned char)0x66; \
  1935  			} \
  1936  			x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \
  1937  			x86_64_opcode1_emit((inst), 0xc0, (size)); \
  1938  			x86_64_reg_emit((inst), (opc), (dreg)); \
  1939  			x86_imm_emit8((inst), (imm)); \
  1940  		} \
  1941  	} while(0)
  1942  
  1943  #define x86_64_shift_mem_imm_size(inst, opc, mem, imm, size) \
  1944  	do { \
  1945  		if((imm) == 1) \
  1946  		{ \
  1947  			if((size) == 2) \
  1948  			{ \
  1949  				*(inst)++ = (unsigned char)0x66; \
  1950  			} \
  1951  			x86_64_rex_emit((inst), (size), 0, 0, 0); \
  1952  			x86_64_opcode1_emit((inst), 0xd0, (size)); \
  1953  			x86_64_mem_emit((inst), (opc), (mem)); \
  1954  		} \
  1955  		else \
  1956  		{ \
  1957  			if((size) == 2) \
  1958  			{ \
  1959  				*(inst)++ = (unsigned char)0x66; \
  1960  			} \
  1961  			x86_64_rex_emit((inst), (size), 0, 0, 0); \
  1962  			x86_64_opcode1_emit((inst), 0xc0, (size)); \
  1963  			x86_64_mem_emit((inst), (opc), (mem)); \
  1964  			x86_imm_emit8((inst), (imm)); \
  1965  		} \
  1966  	} while(0)
  1967  
  1968  #define x86_64_shift_regp_imm_size(inst, opc, dregp, imm, size) \
  1969  	do { \
  1970  		if((imm) == 1) \
  1971  		{ \
  1972  			if((size) == 2) \
  1973  			{ \
  1974  				*(inst)++ = (unsigned char)0x66; \
  1975  			} \
  1976  			x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \
  1977  			x86_64_opcode1_emit((inst), 0xd0, (size)); \
  1978  			x86_64_regp_emit((inst), (opc), (dregp)); \
  1979  		} \
  1980  		else \
  1981  		{ \
  1982  			if((size) == 2) \
  1983  			{ \
  1984  				*(inst)++ = (unsigned char)0x66; \
  1985  			} \
  1986  			x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \
  1987  			x86_64_opcode1_emit((inst), 0xc0, (size)); \
  1988  			x86_64_regp_emit((inst), (opc), (dregp)); \
  1989  			x86_imm_emit8((inst), (imm)); \
  1990  		} \
  1991  	} while(0)
  1992  
  1993  #define x86_64_shift_membase_imm_size(inst, opc, basereg, disp, imm, size) \
  1994  	do { \
  1995  		if((imm) == 1) \
  1996  		{ \
  1997  			if((size) == 2) \
  1998  			{ \
  1999  				*(inst)++ = (unsigned char)0x66; \
  2000  			} \
  2001  			x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
  2002  			x86_64_opcode1_emit((inst), 0xd0, (size)); \
  2003  			x86_64_membase_emit((inst), (opc), (basereg), (disp)); \
  2004  		} \
  2005  		else \
  2006  		{ \
  2007  			if((size) == 2) \
  2008  			{ \
  2009  				*(inst)++ = (unsigned char)0x66; \
  2010  			} \
  2011  			x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
  2012  			x86_64_opcode1_emit((inst), 0xc0, (size)); \
  2013  			x86_64_membase_emit((inst), (opc), (basereg), (disp)); \
  2014  			x86_imm_emit8((inst), (imm)); \
  2015  		} \
  2016  	} while(0)
  2017  
  2018  #define x86_64_shift_memindex_imm_size(inst, opc, basereg, disp, indexreg, shift, imm, size) \
  2019  	do { \
  2020  		if((imm) == 1) \
  2021  		{ \
  2022  			if((size) == 2) \
  2023  			{ \
  2024  				*(inst)++ = (unsigned char)0x66; \
  2025  			} \
  2026  			x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
  2027  			x86_64_opcode1_emit((inst), 0xd0, (size)); \
  2028  			x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
  2029  		} \
  2030  		else \
  2031  		{ \
  2032  			if((size) == 2) \
  2033  			{ \
  2034  				*(inst)++ = (unsigned char)0x66; \
  2035  			} \
  2036  			x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
  2037  			x86_64_opcode1_emit((inst), 0xc0, (size)); \
  2038  			x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
  2039  			x86_imm_emit8((inst), (imm)); \
  2040  		} \
  2041  	} while(0)
  2042  
  2043  /*
  2044   * shift by the number of bits in %cl
  2045   */
  2046  #define x86_64_shift_reg_size(inst, opc, dreg, size) \
  2047  	do { \
  2048  		if((size) == 2) \
  2049  		{ \
  2050  			*(inst)++ = (unsigned char)0x66; \
  2051  		} \
  2052  		x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \
  2053  		x86_64_opcode1_emit((inst), 0xd2, (size)); \
  2054  		x86_64_reg_emit((inst), (opc), (dreg)); \
  2055  	} while(0)
  2056  
  2057  #define x86_64_shift_mem_size(inst, opc, mem, size) \
  2058  	do { \
  2059  		if((size) == 2) \
  2060  		{ \
  2061  			*(inst)++ = (unsigned char)0x66; \
  2062  		} \
  2063  		x86_64_rex_emit((inst), (size), 0, 0, 0); \
  2064  		x86_64_opcode1_emit((inst), 0xd2, (size)); \
  2065  		x86_64_mem_emit((inst), (opc), (mem)); \
  2066  	} while(0)
  2067  
  2068  #define x86_64_shift_regp_size(inst, opc, dregp, size) \
  2069  	do { \
  2070  		if((size) == 2) \
  2071  		{ \
  2072  			*(inst)++ = (unsigned char)0x66; \
  2073  		} \
  2074  		x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \
  2075  		x86_64_opcode1_emit((inst), 0xd2, (size)); \
  2076  		x86_64_regp_emit((inst), (opc), (dregp)); \
  2077  	} while(0)
  2078  
  2079  #define x86_64_shift_membase_size(inst, opc, basereg, disp, size) \
  2080  	do { \
  2081  		if((size) == 2) \
  2082  		{ \
  2083  			*(inst)++ = (unsigned char)0x66; \
  2084  		} \
  2085  		x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
  2086  		x86_64_opcode1_emit((inst), 0xd2, (size)); \
  2087  		x86_64_membase_emit((inst), (opc), (basereg), (disp)); \
  2088  	} while(0)
  2089  
  2090  #define x86_64_shift_memindex_size(inst, opc, basereg, disp, indexreg, shift, size) \
  2091  	do { \
  2092  		if((size) == 2) \
  2093  		{ \
  2094  			*(inst)++ = (unsigned char)0x66; \
  2095  		} \
  2096  		x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
  2097  		x86_64_opcode1_emit((inst), 0xd2, (size)); \
  2098  		x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \
  2099  	} while(0)
  2100  
  2101  /*
  2102   * shl: Shit left (clear the least significant bit)
  2103   */
  2104  #define x86_64_shl_reg_imm_size(inst, dreg, imm, size) \
  2105  	do { \
  2106  		x86_64_shift_reg_imm_size((inst), 4, (dreg), (imm), (size)); \
  2107  	} while(0)
  2108  
  2109  #define x86_64_shl_mem_imm_size(inst, mem, imm, size) \
  2110  	do { \
  2111  		x86_64_shift_mem_imm_size((inst), 4, (mem), (imm), (size)); \
  2112  	} while(0)
  2113  
  2114  #define x86_64_shl_regp_imm_size(inst, dregp, imm, size) \
  2115  	do { \
  2116  		x86_64_shift_regp_imm_size((inst), 4, (dregp), (imm), (size)); \
  2117  	} while(0)
  2118  
  2119  #define x86_64_shl_membase_imm_size(inst, basereg, disp, imm, size) \
  2120  	do { \
  2121  		x86_64_shift_membase_imm_size((inst), 4, (basereg), (disp), (imm), (size)); \
  2122  	} while(0)
  2123  
  2124  #define x86_64_shl_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  2125  	do { \
  2126  		x86_64_shift_memindex_imm_size((inst), 4, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  2127  	} while(0)
  2128  
  2129  #define x86_64_shl_reg_size(inst, dreg, size) \
  2130  	do { \
  2131  		x86_64_shift_reg_size((inst), 4, (dreg), (size)); \
  2132  	} while(0)
  2133  
  2134  #define x86_64_shl_mem_size(inst, mem, size) \
  2135  	do { \
  2136  		x86_64_shift_mem_size((inst), 4, (mem), (size)); \
  2137  	} while(0)
  2138  
  2139  #define x86_64_shl_regp_size(inst, dregp, size) \
  2140  	do { \
  2141  		x86_64_shift_regp_size((inst), 4, (dregp), (size)); \
  2142  	} while(0)
  2143  
  2144  #define x86_64_shl_membase_size(inst, basereg, disp, size) \
  2145  	do { \
  2146  		x86_64_shift_membase_size((inst), 4, (basereg), (disp), (size)); \
  2147  	} while(0)
  2148  
  2149  #define x86_64_shl_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  2150  	do { \
  2151  		x86_64_shift_memindex_size((inst), 4, (basereg), (disp), (indexreg), (shift), (size)); \
  2152  	} while(0)
  2153  
  2154  /*
  2155   * shr: Unsigned shit right (clear the most significant bit)
  2156   */
  2157  #define x86_64_shr_reg_imm_size(inst, dreg, imm, size) \
  2158  	do { \
  2159  		x86_64_shift_reg_imm_size((inst), 5, (dreg), (imm), (size)); \
  2160  	} while(0)
  2161  
  2162  #define x86_64_shr_mem_imm_size(inst, mem, imm, size) \
  2163  	do { \
  2164  		x86_64_shift_mem_imm_size((inst), 5, (mem), (imm), (size)); \
  2165  	} while(0)
  2166  
  2167  #define x86_64_shr_regp_imm_size(inst, dregp, imm, size) \
  2168  	do { \
  2169  		x86_64_shift_regp_imm_size((inst), 5, (dregp), (imm), (size)); \
  2170  	} while(0)
  2171  
  2172  #define x86_64_shr_membase_imm_size(inst, basereg, disp, imm, size) \
  2173  	do { \
  2174  		x86_64_shift_membase_imm_size((inst), 5, (basereg), (disp), (imm), (size)); \
  2175  	} while(0)
  2176  
  2177  #define x86_64_shr_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  2178  	do { \
  2179  		x86_64_shift_memindex_imm_size((inst), 5, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  2180  	} while(0)
  2181  
  2182  #define x86_64_shr_reg_size(inst, dreg, size) \
  2183  	do { \
  2184  		x86_64_shift_reg_size((inst), 5, (dreg), (size)); \
  2185  	} while(0)
  2186  
  2187  #define x86_64_shr_mem_size(inst, mem, size) \
  2188  	do { \
  2189  		x86_64_shift_mem_size((inst), 5, (mem), (size)); \
  2190  	} while(0)
  2191  
  2192  #define x86_64_shr_regp_size(inst, dregp, size) \
  2193  	do { \
  2194  		x86_64_shift_regp_size((inst), 5, (dregp), (size)); \
  2195  	} while(0)
  2196  
  2197  #define x86_64_shr_membase_size(inst, basereg, disp, size) \
  2198  	do { \
  2199  		x86_64_shift_membase_size((inst), 5, (basereg), (disp), (size)); \
  2200  	} while(0)
  2201  
  2202  #define x86_64_shr_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  2203  	do { \
  2204  		x86_64_shift_memindex_size((inst), 5, (basereg), (disp), (indexreg), (shift), (size)); \
  2205  	} while(0)
  2206  
  2207  /*
  2208   * sar: Signed shit right (keep the most significant bit)
  2209   */
  2210  #define x86_64_sar_reg_imm_size(inst, dreg, imm, size) \
  2211  	do { \
  2212  		x86_64_shift_reg_imm_size((inst), 7, (dreg), (imm), (size)); \
  2213  	} while(0)
  2214  
  2215  #define x86_64_sar_mem_imm_size(inst, mem, imm, size) \
  2216  	do { \
  2217  		x86_64_shift_mem_imm_size((inst), 7, (mem), (imm), (size)); \
  2218  	} while(0)
  2219  
  2220  #define x86_64_sar_regp_imm_size(inst, dregp, imm, size) \
  2221  	do { \
  2222  		x86_64_shift_regp_imm_size((inst), 7, (dregp), (imm), (size)); \
  2223  	} while(0)
  2224  
  2225  #define x86_64_sar_membase_imm_size(inst, basereg, disp, imm, size) \
  2226  	do { \
  2227  		x86_64_shift_membase_imm_size((inst), 7, (basereg), (disp), (imm), (size)); \
  2228  	} while(0)
  2229  
  2230  #define x86_64_sar_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  2231  	do { \
  2232  		x86_64_shift_memindex_imm_size((inst), 7, (basereg), (disp), (indexreg), (shift), (imm), (size)); \
  2233  	} while(0)
  2234  
  2235  #define x86_64_sar_reg_size(inst, dreg, size) \
  2236  	do { \
  2237  		x86_64_shift_reg_size((inst), 7, (dreg), (size)); \
  2238  	} while(0)
  2239  
  2240  #define x86_64_sar_mem_size(inst, mem, size) \
  2241  	do { \
  2242  		x86_64_shift_mem_size((inst), 7, (mem), (size)); \
  2243  	} while(0)
  2244  
  2245  #define x86_64_sar_regp_size(inst, dregp, size) \
  2246  	do { \
  2247  		x86_64_shift_regp_size((inst), 7, (dregp), (size)); \
  2248  	} while(0)
  2249  
  2250  #define x86_64_sar_membase_size(inst, basereg, disp, size) \
  2251  	do { \
  2252  		x86_64_shift_membase_size((inst), 7, (basereg), (disp), (size)); \
  2253  	} while(0)
  2254  
  2255  #define x86_64_sar_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  2256  	do { \
  2257  		x86_64_shift_memindex_size((inst), 7, (basereg), (disp), (indexreg), (shift), (size)); \
  2258  	} while(0)
  2259  
  2260  /*
  2261   * test: and tha values and set sf, zf and pf according to the result
  2262   */
  2263  #define x86_64_test_reg_imm_size(inst, reg, imm, size) \
  2264  	do { \
  2265  		if((size) == 2) \
  2266  		{ \
  2267  			*(inst)++ = (unsigned char)0x66; \
  2268  		} \
  2269  		x86_64_rex_emit((inst), (size), 0, 0, (reg)); \
  2270  		if((reg) == X86_64_RAX) { \
  2271  			x86_64_opcode1_emit((inst), 0xa8, (size)); \
  2272  		} \
  2273  		else \
  2274  		{ \
  2275  			x86_64_opcode1_emit((inst), 0xf6, (size)); \
  2276  			x86_64_reg_emit((inst), 0, (reg)); \
  2277  		} \
  2278  		x86_64_imm_emit_max32((inst), (imm), (size)); \
  2279  	} while (0)
  2280  
  2281  #define x86_64_test_regp_imm_size(inst, regp, imm, size) \
  2282  	do { \
  2283  		if((size) == 2) \
  2284  		{ \
  2285  			*(inst)++ = (unsigned char)0x66; \
  2286  		} \
  2287  		x86_64_rex_emit((inst), (size), 0, 0, (regp)); \
  2288  		x86_64_opcode1_emit((inst), 0xf6, (size)); \
  2289  		x86_64_regp_emit((inst), 0, (regp)); \
  2290  		x86_64_imm_emit_max32((inst), (imm), (size)); \
  2291  	} while (0)
  2292  
  2293  #define x86_64_test_mem_imm_size(inst, mem, imm, size) \
  2294  	do { \
  2295  		if((size) == 2) \
  2296  		{ \
  2297  			*(inst)++ = (unsigned char)0x66; \
  2298  		} \
  2299  		x86_64_rex_emit((inst), (size), 0, 0, 0); \
  2300  		x86_64_opcode1_emit((inst), 0xf6, (size)); \
  2301  		x86_64_mem_emit((inst), 0, (mem)); \
  2302  		x86_64_imm_emit_max32((inst), (imm), (size)); \
  2303  	} while (0)
  2304  
  2305  #define x86_64_test_membase_imm_size(inst, basereg, disp, imm, size) \
  2306  	do { \
  2307  		if((size) == 2) \
  2308  		{ \
  2309  			*(inst)++ = (unsigned char)0x66; \
  2310  		} \
  2311  		x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \
  2312  		x86_64_opcode1_emit((inst), 0xf6, (size)); \
  2313  		x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  2314  		x86_64_imm_emit_max32((inst), (imm), (size)); \
  2315  	} while (0)
  2316  
  2317  #define x86_64_test_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  2318  	do { \
  2319  		if((size) == 2) \
  2320  		{ \
  2321  			*(inst)++ = (unsigned char)0x66; \
  2322  		} \
  2323  		x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
  2324  		x86_64_opcode1_emit((inst), 0xf6, (size)); \
  2325  		x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \
  2326  		x86_64_imm_emit_max32((inst), (imm), (size)); \
  2327  	} while (0)
  2328  
  2329  #define x86_64_test_reg_reg_size(inst, dreg, sreg, size) \
  2330  	do { \
  2331  		if((size) == 2) \
  2332  		{ \
  2333  			*(inst)++ = (unsigned char)0x66; \
  2334  		} \
  2335  		x86_64_rex_emit((inst), (size), (sreg), 0, (dreg)); \
  2336  		x86_64_opcode1_emit((inst), 0x84, (size)); \
  2337  		x86_64_reg_emit((inst), (sreg), (dreg)); \
  2338  	} while (0)
  2339  
  2340  #define x86_64_test_regp_reg_size(inst, dregp, sreg, size) \
  2341  	do { \
  2342  		if((size) == 2) \
  2343  		{ \
  2344  			*(inst)++ = (unsigned char)0x66; \
  2345  		} \
  2346  		x86_64_rex_emit((inst), (size), (sreg), 0, (dregp)); \
  2347  		x86_64_opcode1_emit((inst), 0x84, (size)); \
  2348  		x86_64_regp_emit((inst), (sreg), (dregp)); \
  2349  	} while (0)
  2350  
  2351  #define x86_64_test_mem_reg_size(inst, mem, sreg, size) \
  2352  	do { \
  2353  		if((size) == 2) \
  2354  		{ \
  2355  			*(inst)++ = (unsigned char)0x66; \
  2356  		} \
  2357  		x86_64_rex_emit((inst), (size), (sreg), 0, 0); \
  2358  		x86_64_opcode1_emit((inst), 0x84, (size)); \
  2359  		x86_64_mem_emit((inst), (sreg), (mem)); \
  2360  	} while (0)
  2361  
  2362  #define x86_64_test_membase_reg_size(inst, basereg, disp, sreg, size) \
  2363  	do { \
  2364  		if((size) == 2) \
  2365  		{ \
  2366  			*(inst)++ = (unsigned char)0x66; \
  2367  		} \
  2368  		x86_64_rex_emit((inst), (size), (sreg), 0, (basereg)); \
  2369  		x86_64_opcode1_emit((inst), 0x84, (size)); \
  2370  		x86_64_membase_emit((inst), (sreg), (basereg), (disp)); \
  2371  	} while (0)
  2372  
  2373  #define x86_64_test_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  2374  	do { \
  2375  		if((size) == 2) \
  2376  		{ \
  2377  			*(inst)++ = (unsigned char)0x66; \
  2378  		} \
  2379  		x86_64_rex_emit((inst), (size), (sreg), (indexreg), (basereg)); \
  2380  		x86_64_opcode1_emit((inst), 0x84, (size)); \
  2381  		x86_64_memindex_emit((inst), (sreg), (basereg), (disp), (indexreg), (shift)); \
  2382  	} while (0)
  2383  
  2384  /*
  2385   * imul: signed multiply
  2386   */
  2387  #define x86_64_imul_reg_reg_imm_size(inst, dreg, sreg, imm, size) \
  2388  	do { \
  2389  		if((size) == 2) \
  2390  		{ \
  2391  			*(inst)++ = (unsigned char)0x66; \
  2392  		} \
  2393  		x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \
  2394  		if(x86_is_imm8((imm))) \
  2395  		{ \
  2396  			*(inst)++ = (unsigned char)0x6b; \
  2397  			x86_64_reg_emit((inst), (dreg), (sreg)); \
  2398  			x86_imm_emit8((inst), (imm)); \
  2399  		} \
  2400  		else \
  2401  		{ \
  2402  			*(inst)++ = (unsigned char)0x69; \
  2403  			x86_64_reg_emit((inst), (dreg), (sreg)); \
  2404  			switch((size)) \
  2405  			{ \
  2406  				case 2: \
  2407  				{ \
  2408  					x86_imm_emit16(inst, (imm)); \
  2409  				} \
  2410  				break; \
  2411  				case 4: \
  2412  				case 8: \
  2413  				{ \
  2414  					x86_imm_emit32(inst, (imm)); \
  2415  				} \
  2416  				break; \
  2417  			} \
  2418  		} \
  2419  	} while(0)
  2420  
  2421  #define x86_64_imul_reg_regp_imm_size(inst, dreg, sregp, imm, size) \
  2422  	do { \
  2423  		if((size) == 2) \
  2424  		{ \
  2425  			*(inst)++ = (unsigned char)0x66; \
  2426  		} \
  2427  		x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \
  2428  		if(x86_is_imm8((imm))) \
  2429  		{ \
  2430  			*(inst)++ = (unsigned char)0x6b; \
  2431  			x86_64_regp_emit((inst), (dreg), (sregp)); \
  2432  			x86_imm_emit8((inst), (imm)); \
  2433  		} \
  2434  		else \
  2435  		{ \
  2436  			*(inst)++ = (unsigned char)0x69; \
  2437  			x86_64_regp_emit((inst), (dreg), (sregp)); \
  2438  			switch((size)) \
  2439  			{ \
  2440  				case 2: \
  2441  				{ \
  2442  					x86_imm_emit16(inst, (imm)); \
  2443  				} \
  2444  				break; \
  2445  				case 4: \
  2446  				case 8: \
  2447  				{ \
  2448  					x86_imm_emit32(inst, (imm)); \
  2449  				} \
  2450  				break; \
  2451  			} \
  2452  		} \
  2453  	} while(0)
  2454  
  2455  #define x86_64_imul_reg_mem_imm_size(inst, dreg, mem, imm, size) \
  2456  	do { \
  2457  		if((size) == 2) \
  2458  		{ \
  2459  			*(inst)++ = (unsigned char)0x66; \
  2460  		} \
  2461  		x86_64_rex_emit((inst), (size), (dreg), 0, 0); \
  2462  		if(x86_is_imm8((imm))) \
  2463  		{ \
  2464  			*(inst)++ = (unsigned char)0x6b; \
  2465  			x86_64_mem_emit((inst), (dreg), (mem)); \
  2466  			x86_imm_emit8((inst), (imm)); \
  2467  		} \
  2468  		else \
  2469  		{ \
  2470  			*(inst)++ = (unsigned char)0x69; \
  2471  			x86_64_mem_emit((inst), (dreg), (mem)); \
  2472  			switch((size)) \
  2473  			{ \
  2474  				case 2: \
  2475  				{ \
  2476  					x86_imm_emit16(inst, (imm)); \
  2477  				} \
  2478  				break; \
  2479  				case 4: \
  2480  				case 8: \
  2481  				{ \
  2482  					x86_imm_emit32(inst, (imm)); \
  2483  				} \
  2484  				break; \
  2485  			} \
  2486  		} \
  2487  	} while(0)
  2488  
  2489  #define x86_64_imul_reg_membase_imm_size(inst, dreg, basereg, disp, imm, size) \
  2490  	do { \
  2491  		if((size) == 2) \
  2492  		{ \
  2493  			*(inst)++ = (unsigned char)0x66; \
  2494  		} \
  2495  		x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \
  2496  		if(x86_is_imm8((imm))) \
  2497  		{ \
  2498  			*(inst)++ = (unsigned char)0x6b; \
  2499  			x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  2500  			x86_imm_emit8((inst), (imm)); \
  2501  		} \
  2502  		else \
  2503  		{ \
  2504  			*(inst)++ = (unsigned char)0x69; \
  2505  			x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  2506  			switch((size)) \
  2507  			{ \
  2508  				case 2: \
  2509  				{ \
  2510  					x86_imm_emit16(inst, (imm)); \
  2511  				} \
  2512  				break; \
  2513  				case 4: \
  2514  				case 8: \
  2515  				{ \
  2516  					x86_imm_emit32(inst, (imm)); \
  2517  				} \
  2518  				break; \
  2519  			} \
  2520  		} \
  2521  	} while(0)
  2522  
  2523  #define x86_64_imul_reg_memindex_imm_size(inst, dreg, basereg, disp, indexreg, shift, imm, size) \
  2524  	do { \
  2525  		if((size) == 2) \
  2526  		{ \
  2527  			*(inst)++ = (unsigned char)0x66; \
  2528  		} \
  2529  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  2530  		if(x86_is_imm8((imm))) \
  2531  		{ \
  2532  			*(inst)++ = (unsigned char)0x6b; \
  2533  			x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  2534  			x86_imm_emit8((inst), (imm)); \
  2535  		} \
  2536  		else \
  2537  		{ \
  2538  			*(inst)++ = (unsigned char)0x69; \
  2539  			x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  2540  			switch((size)) \
  2541  			{ \
  2542  				case 2: \
  2543  				{ \
  2544  					x86_imm_emit16(inst, (imm)); \
  2545  				} \
  2546  				break; \
  2547  				case 4: \
  2548  				case 8: \
  2549  				{ \
  2550  					x86_imm_emit32(inst, (imm)); \
  2551  				} \
  2552  				break; \
  2553  			} \
  2554  		} \
  2555  	} while(0)
  2556  
  2557  #define x86_64_imul_reg_reg_size(inst, dreg, sreg, size) \
  2558  	do { \
  2559  		if((size) == 2) \
  2560  		{ \
  2561  			*(inst)++ = (unsigned char)0x66; \
  2562  		} \
  2563  		x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \
  2564  		*(inst)++ = (unsigned char)0x0F; \
  2565  		*(inst)++ = (unsigned char)0xAF; \
  2566  		x86_64_reg_emit((inst), (dreg), (sreg)); \
  2567  	} while(0)
  2568  
  2569  #define x86_64_imul_reg_regp_size(inst, dreg, sregp, size) \
  2570  	do { \
  2571  		if((size) == 2) \
  2572  		{ \
  2573  			*(inst)++ = (unsigned char)0x66; \
  2574  		} \
  2575  		x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \
  2576  		*(inst)++ = (unsigned char)0x0F; \
  2577  		*(inst)++ = (unsigned char)0xAF; \
  2578  		x86_64_regp_emit((inst), (dreg), (sregp)); \
  2579  	} while(0)
  2580  
  2581  #define x86_64_imul_reg_mem_size(inst, dreg, mem, size) \
  2582  	do { \
  2583  		if((size) == 2) \
  2584  		{ \
  2585  			*(inst)++ = (unsigned char)0x66; \
  2586  		} \
  2587  		x86_64_rex_emit((inst), (size), (dreg), 0, 0); \
  2588  		*(inst)++ = (unsigned char)0x0F; \
  2589  		*(inst)++ = (unsigned char)0xAF; \
  2590  		x86_64_mem_emit((inst), (dreg), (mem)); \
  2591  	} while(0)
  2592  
  2593  #define x86_64_imul_reg_membase_size(inst, dreg, basereg, disp, size) \
  2594  	do { \
  2595  		if((size) == 2) \
  2596  		{ \
  2597  			*(inst)++ = (unsigned char)0x66; \
  2598  		} \
  2599  		x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \
  2600  		*(inst)++ = (unsigned char)0x0F; \
  2601  		*(inst)++ = (unsigned char)0xAF; \
  2602  		x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  2603  	} while(0)
  2604  
  2605  #define x86_64_imul_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2606  	do { \
  2607  		if((size) == 2) \
  2608  		{ \
  2609  			*(inst)++ = (unsigned char)0x66; \
  2610  		} \
  2611  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  2612  		*(inst)++ = (unsigned char)0x0F; \
  2613  		*(inst)++ = (unsigned char)0xAF; \
  2614  		x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  2615  	} while(0)
  2616  
  2617  /*
  2618   * cwd, cdq, cqo: sign extend ax to dx (used for div and idiv)
  2619   */
  2620  #define x86_64_cwd(inst) \
  2621  	do { \
  2622  		*(inst)++ = (unsigned char)0x66; \
  2623  		*(inst)++ = (unsigned char)0x99; \
  2624  	} while(0)
  2625  
  2626  #define x86_64_cdq(inst) \
  2627  	do { \
  2628  		*(inst)++ = (unsigned char)0x99; \
  2629  	} while(0)
  2630  
  2631  #define x86_64_cqo(inst) \
  2632  	do { \
  2633  		*(inst)++ = (unsigned char)0x48; \
  2634  		*(inst)++ = (unsigned char)0x99; \
  2635  	} while(0)
  2636  
  2637  /*
  2638   * Lea instructions
  2639   */
  2640  #define x86_64_lea_mem_size(inst, dreg, mem, size) \
  2641  	do { \
  2642  		if((size) == 2) \
  2643  		{ \
  2644  			*(inst)++ = (unsigned char)0x66; \
  2645  		} \
  2646  		x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \
  2647  		x86_lea_mem((inst), ((dreg) & 0x7), (mem)); \
  2648  	} while(0)
  2649  
  2650  #define x86_64_lea_membase_size(inst, dreg, basereg, disp, size) \
  2651  	do { \
  2652  		if((size) == 2) \
  2653  		{ \
  2654  			*(inst)++ = (unsigned char)0x66; \
  2655  		} \
  2656  		x86_64_rex_emit(inst, (size), (dreg), 0, (basereg)); \
  2657  		*(inst)++ = (unsigned char)0x8d;	\
  2658  		x86_64_membase_emit((inst), (dreg), (basereg), (disp));	\
  2659  	} while (0)
  2660  
  2661  #define x86_64_lea_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2662  	do { \
  2663  		if((size) == 2) \
  2664  		{ \
  2665  			*(inst)++ = (unsigned char)0x66; \
  2666  		} \
  2667  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  2668  		*(inst)++ = (unsigned char)0x8d; \
  2669  		x86_64_memindex_emit ((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  2670  	} while(0)
  2671  
  2672  /*
  2673   * Move instructions.
  2674   */
  2675  #define x86_64_mov_reg_reg_size(inst, dreg, sreg, size) \
  2676  	do { \
  2677  		if((size) == 2) \
  2678  		{ \
  2679  			*(inst)++ = (unsigned char)0x66; \
  2680  		} \
  2681  		x86_64_rex_emit(inst, (size), (dreg), 0, (sreg)); \
  2682  		x86_64_opcode1_emit(inst, 0x8a, (size)); \
  2683  		x86_64_reg_emit((inst), ((dreg) & 0x7), ((sreg) & 0x7)); \
  2684  	} while(0)
  2685  
  2686  #define x86_64_mov_regp_reg_size(inst, regp, sreg, size) \
  2687  	do { \
  2688  		if((size) == 2) \
  2689  		{ \
  2690  			*(inst)++ = (unsigned char)0x66; \
  2691  		} \
  2692  		x86_64_rex_emit(inst, (size), (sreg), 0, (regp)); \
  2693  		x86_64_opcode1_emit(inst, 0x88, (size)); \
  2694  		x86_64_regp_emit((inst), (sreg), (regp)); \
  2695  	} while (0)
  2696  
  2697  #define x86_64_mov_membase_reg_size(inst, basereg, disp, sreg, size) \
  2698  	do { \
  2699  		if((size) == 2) \
  2700  		{ \
  2701  			*(inst)++ = (unsigned char)0x66; \
  2702  		} \
  2703  		x86_64_rex_emit(inst, (size), (sreg), 0, (basereg)); \
  2704  		x86_64_opcode1_emit(inst, 0x88, (size)); \
  2705  		x86_64_membase_emit((inst), (sreg), (basereg), (disp));	\
  2706  	} while(0)
  2707  
  2708  #define x86_64_mov_memindex_reg_size(inst, basereg, disp, indexreg, shift, sreg, size) \
  2709  	do { \
  2710  		if((size) == 2) \
  2711  		{ \
  2712  			*(inst)++ = (unsigned char)0x66; \
  2713  		} \
  2714  		x86_64_rex_emit((inst), (size), (sreg), (indexreg), (basereg)); \
  2715  		x86_64_opcode1_emit(inst, 0x88, (size)); \
  2716  		x86_64_memindex_emit((inst), (sreg), (basereg), (disp), (indexreg), (shift)); \
  2717  	} while (0)
  2718  
  2719  /*
  2720   * Using the AX register is the only possibility to address 64bit.
  2721   * All other registers are bound to 32bit values.
  2722   */
  2723  #define x86_64_mov_mem_reg_size(inst, mem, sreg, size) \
  2724  	do { \
  2725  		if((size) == 2) \
  2726  		{ \
  2727  			*(inst)++ = (unsigned char)0x66; \
  2728  		} \
  2729  		x86_64_rex_emit(inst, (size), (sreg), 0, 0); \
  2730  		if((sreg) == X86_64_RAX) \
  2731  		{ \
  2732  			x86_64_opcode1_emit(inst, 0xa2, (size)); \
  2733  			x86_64_imm_emit64(inst, (mem)); \
  2734  		} \
  2735  		else \
  2736  		{ \
  2737  			x86_64_opcode1_emit(inst, 0x88, (size)); \
  2738  			x86_address_byte((inst), 0, ((sreg) & 0x7), 4); \
  2739  			x86_address_byte((inst), 0, 4, 5); \
  2740  	        x86_imm_emit32((inst), (mem)); \
  2741  		} \
  2742  	} while (0)
  2743  
  2744  #define x86_64_mov_reg_imm_size(inst, dreg, imm, size) \
  2745  	do { \
  2746  		if((size) == 2) \
  2747  		{ \
  2748  			*(inst)++ = (unsigned char)0x66; \
  2749  		} \
  2750  		x86_64_rex_emit(inst, (size), 0, 0, (dreg)); \
  2751  		switch((size)) \
  2752  		{ \
  2753  			case 1: \
  2754  			{ \
  2755  				*(inst)++ = (unsigned char)0xb0 + ((dreg) & 0x7); \
  2756  				x86_imm_emit8(inst, (imm)); \
  2757  			} \
  2758  			break; \
  2759  			case 2: \
  2760  			{ \
  2761  				*(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \
  2762  				x86_imm_emit16(inst, (imm)); \
  2763  			} \
  2764  			break; \
  2765  			case 4: \
  2766  			{ \
  2767  				*(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \
  2768  				x86_imm_emit32(inst, (imm)); \
  2769  			} \
  2770  			break; \
  2771  			case 8: \
  2772  			{ \
  2773  				jit_nint __x86_64_imm = (imm); \
  2774  				if(__x86_64_imm >= (jit_nint)jit_min_int && __x86_64_imm <= (jit_nint)jit_max_int) \
  2775  				{ \
  2776  					*(inst)++ = (unsigned char)0xc7; \
  2777  					x86_64_reg_emit((inst), 0, (dreg)); \
  2778  					x86_imm_emit32(inst, (__x86_64_imm)); \
  2779  				} \
  2780  				else \
  2781  				{ \
  2782  					*(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \
  2783  					x86_64_imm_emit64(inst, (__x86_64_imm)); \
  2784  				} \
  2785  			} \
  2786  			break; \
  2787  		} \
  2788  	} while(0)
  2789  
  2790  /*
  2791   * Using the AX register is the only possibility to address 64bit.
  2792   * All other registers are bound to 32bit values.
  2793   */
  2794  #define x86_64_mov_reg_mem_size(inst, dreg, mem, size) \
  2795  	do { \
  2796  		if((size) == 2) \
  2797  		{ \
  2798  			*(inst)++ = (unsigned char)0x66; \
  2799  		} \
  2800  		x86_64_rex_emit(inst, (size), (dreg), 0, 0); \
  2801  		if((dreg) == X86_64_RAX) \
  2802  		{ \
  2803  			x86_64_opcode1_emit(inst, 0xa0, (size)); \
  2804  			x86_64_imm_emit64(inst, (mem)); \
  2805  		} \
  2806  		else \
  2807  		{ \
  2808  			x86_64_opcode1_emit(inst, 0x8a, (size)); \
  2809  			x86_address_byte ((inst), 0, (dreg), 4); \
  2810  			x86_address_byte ((inst), 0, 4, 5); \
  2811  			x86_imm_emit32 ((inst), (mem)); \
  2812  		} \
  2813  	} while (0)
  2814  
  2815  #define x86_64_mov_reg_regp_size(inst, dreg, sregp, size) \
  2816  	do { \
  2817  		if((size) == 2) \
  2818  		{ \
  2819  			*(inst)++ = (unsigned char)0x66; \
  2820  		} \
  2821  		x86_64_rex_emit(inst, (size), (dreg), 0, (sregp)); \
  2822  		x86_64_opcode1_emit(inst, 0x8a, (size)); \
  2823  		x86_64_regp_emit((inst), (dreg), (sregp)); \
  2824  	} while(0)
  2825  
  2826  #define x86_64_mov_reg_membase_size(inst, dreg, basereg, disp, size) \
  2827  	do { \
  2828  		if((size) == 2) \
  2829  		{ \
  2830  			*(inst)++ = (unsigned char)0x66; \
  2831  		} \
  2832  		x86_64_rex_emit(inst, (size), (dreg), 0, (basereg)); \
  2833  		x86_64_opcode1_emit(inst, 0x8a, (size)); \
  2834  		x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  2835  	} while(0)
  2836  
  2837  
  2838  #define x86_64_mov_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2839  	do { \
  2840  		if((size) == 2) \
  2841  		{ \
  2842  			*(inst)++ = (unsigned char)0x66; \
  2843  		} \
  2844  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  2845  		x86_64_opcode1_emit(inst, 0x8a, (size)); \
  2846  		x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  2847  	} while(0)
  2848  
  2849  /*
  2850   * Only 32bit mem and imm values are allowed here.
  2851   * mem is be RIP relative.
  2852   * 32 bit imm will be sign extended to 64 bits for 64 bit size.
  2853   */
  2854  #define x86_64_mov_mem_imm_size(inst, mem, imm, size) \
  2855  	do { \
  2856  		if((size) == 2) \
  2857  		{ \
  2858  			*(inst)++ = (unsigned char)0x66; \
  2859  		} \
  2860  		x86_64_rex_emit((inst), (size), 0, 0, 0); \
  2861  		x86_64_opcode1_emit(inst, 0xc6, (size)); \
  2862  		x86_64_mem_emit((inst), 0, (mem)); \
  2863  		x86_64_imm_emit_max32(inst, (imm), (size)); \
  2864  	} while(0)
  2865  
  2866  #define x86_64_mov_regp_imm_size(inst, dregp, imm, size) \
  2867  	do { \
  2868  		if((size) == 2) \
  2869  		{ \
  2870  			*(inst)++ = (unsigned char)0x66; \
  2871  		} \
  2872  		x86_64_rex_emit(inst, (size), 0, 0, (dregp)); \
  2873  		x86_64_opcode1_emit(inst, 0xc6, (size)); \
  2874  		x86_64_regp_emit((inst), 0, (dregp)); \
  2875  		x86_64_imm_emit_max32(inst, (imm), (size)); \
  2876  	} while(0)
  2877  
  2878  #define x86_64_mov_membase_imm_size(inst, basereg, disp, imm, size) \
  2879  	do { \
  2880  		if((size) == 2) \
  2881  		{ \
  2882  			*(inst)++ = (unsigned char)0x66; \
  2883  		} \
  2884  		x86_64_rex_emit(inst, (size), 0, 0, (basereg)); \
  2885  		x86_64_opcode1_emit(inst, 0xc6, (size)); \
  2886  		x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  2887  		x86_64_imm_emit_max32(inst, (imm), (size)); \
  2888  	} while(0)
  2889  
  2890  #define x86_64_mov_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \
  2891  	do { \
  2892  		if((size) == 2) \
  2893  		{ \
  2894  			*(inst)++ = (unsigned char)0x66; \
  2895  		} \
  2896  		x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \
  2897  		x86_64_opcode1_emit(inst, 0xc6, (size)); \
  2898  		x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \
  2899  		x86_64_imm_emit_max32(inst, (imm), (size)); \
  2900  	} while(0)
  2901  
  2902  /*
  2903   * Move with sign extension to the given size (signed)
  2904   */
  2905  #define x86_64_movsx8_reg_reg_size(inst, dreg, sreg, size) \
  2906  	do { \
  2907  		x86_64_alu2_reg_reg_size((inst), 0x0f, 0xbe, (dreg), (sreg), (size) | 1); \
  2908  	}while(0)
  2909  
  2910  #define x86_64_movsx8_reg_regp_size(inst, dreg, sregp, size) \
  2911  	do { \
  2912  		x86_64_alu2_reg_regp_size((inst), 0x0f, 0xbe, (dreg), (sregp), (size)); \
  2913  	}while(0)
  2914  
  2915  #define x86_64_movsx8_reg_mem_size(inst, dreg, mem, size) \
  2916  	do { \
  2917  		x86_64_alu2_reg_mem_size((inst), 0x0f, 0xbe, (dreg), (mem), (size)); \
  2918  	}while(0)
  2919  
  2920  #define x86_64_movsx8_reg_membase_size(inst, dreg, basereg, disp, size) \
  2921  	do { \
  2922  		x86_64_alu2_reg_membase_size((inst), 0x0f, 0xbe, (dreg), (basereg), (disp), (size)); \
  2923  	}while(0)
  2924  
  2925  #define x86_64_movsx8_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2926  	do { \
  2927  		x86_64_alu2_reg_memindex_size((inst), 0x0f, 0xbe, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  2928  	}while(0)
  2929  
  2930  #define x86_64_movsx16_reg_reg_size(inst, dreg, sreg, size) \
  2931  	do { \
  2932  		x86_64_alu2_reg_reg_size((inst), 0x0f, 0xbf, (dreg), (sreg), (size)); \
  2933  	}while(0)
  2934  
  2935  #define x86_64_movsx16_reg_regp_size(inst, dreg, sregp, size) \
  2936  	do { \
  2937  		x86_64_alu2_reg_regp_size((inst), 0x0f, 0xbf, (dreg), (sregp), (size)); \
  2938  	}while(0)
  2939  
  2940  #define x86_64_movsx16_reg_mem_size(inst, dreg, mem, size) \
  2941  	do { \
  2942  		x86_64_alu2_reg_mem_size((inst), 0x0f, 0xbf, (dreg), (mem), (size)); \
  2943  	}while(0)
  2944  
  2945  #define x86_64_movsx16_reg_membase_size(inst, dreg, basereg, disp, size) \
  2946  	do { \
  2947  		x86_64_alu2_reg_membase_size((inst), 0x0f, 0xbf, (dreg), (basereg), (disp), (size)); \
  2948  	}while(0)
  2949  
  2950  #define x86_64_movsx16_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2951  	do { \
  2952  		x86_64_alu2_reg_memindex_size((inst), 0x0f, 0xbf, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  2953  	}while(0)
  2954  
  2955  #define x86_64_movsx32_reg_reg_size(inst, dreg, sreg, size) \
  2956  	do { \
  2957  		x86_64_alu1_reg_reg_size((inst), 0x63, (dreg), (sreg), (size)); \
  2958  	}while(0)
  2959  
  2960  #define x86_64_movsx32_reg_regp_size(inst, dreg, sregp, size) \
  2961  	do { \
  2962  		x86_64_alu1_reg_regp_size((inst), 0x63, (dreg), (sregp), (size)); \
  2963  	}while(0)
  2964  
  2965  #define x86_64_movsx32_reg_mem_size(inst, dreg, mem, size) \
  2966  	do { \
  2967  		x86_64_alu1_reg_mem_size((inst), 0x63, (dreg), (mem), (size)); \
  2968  	}while(0)
  2969  
  2970  #define x86_64_movsx32_reg_membase_size(inst, dreg, basereg, disp, size) \
  2971  	do { \
  2972  		x86_64_alu1_reg_membase_size((inst), 0x63, (dreg), (basereg), (disp), (size)); \
  2973  	}while(0)
  2974  
  2975  #define x86_64_movsx32_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  2976  	do { \
  2977  		x86_64_alu1_reg_memindex_size((inst), 0x63, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  2978  	}while(0)
  2979  
  2980  /*
  2981   * Move with zero extension to the given size (unsigned)
  2982   */
  2983  #define x86_64_movzx8_reg_reg_size(inst, dreg, sreg, size) \
  2984  	do { \
  2985  		x86_64_alu2_reg_reg_size((inst), 0x0f, 0xb6, (dreg), (sreg), (size) | 1); \
  2986  	}while(0)
  2987  
  2988  #define x86_64_movzx8_reg_regp_size(inst, dreg, sregp, size) \
  2989  	do { \
  2990  		x86_64_alu2_reg_regp_size((inst), 0x0f, 0xb6, (dreg), (sregp), (size)); \
  2991  	}while(0)
  2992  
  2993  #define x86_64_movzx8_reg_mem_size(inst, dreg, mem, size) \
  2994  	do { \
  2995  		x86_64_alu2_reg_mem_size((inst), 0x0f, 0xb6, (dreg), (mem), (size)); \
  2996  	}while(0)
  2997  
  2998  #define x86_64_movzx8_reg_membase_size(inst, dreg, basereg, disp, size) \
  2999  	do { \
  3000  		x86_64_alu2_reg_membase_size((inst), 0x0f, 0xb6, (dreg), (basereg), (disp), (size)); \
  3001  	}while(0)
  3002  
  3003  #define x86_64_movzx8_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  3004  	do { \
  3005  		x86_64_alu2_reg_memindex_size((inst), 0x0f, 0xb6, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  3006  	}while(0)
  3007  
  3008  #define x86_64_movzx16_reg_reg_size(inst, dreg, sreg, size) \
  3009  	do { \
  3010  		x86_64_alu2_reg_reg_size((inst), 0x0f, 0xb7, (dreg), (sreg), (size)); \
  3011  	}while(0)
  3012  
  3013  #define x86_64_movzx16_reg_regp_size(inst, dreg, sregp, size) \
  3014  	do { \
  3015  		x86_64_alu2_reg_regp_size((inst), 0x0f, 0xb7, (dreg), (sregp), (size)); \
  3016  	}while(0)
  3017  
  3018  #define x86_64_movzx16_reg_mem_size(inst, dreg, mem, size) \
  3019  	do { \
  3020  		x86_64_alu2_reg_mem_size((inst), 0x0f, 0xb7, (dreg), (mem), (size)); \
  3021  	}while(0)
  3022  
  3023  #define x86_64_movzx16_reg_membase_size(inst, dreg, basereg, disp, size) \
  3024  	do { \
  3025  		x86_64_alu2_reg_membase_size((inst), 0x0f, 0xb7, (dreg), (basereg), (disp), (size)); \
  3026  	}while(0)
  3027  
  3028  #define x86_64_movzx16_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  3029  	do { \
  3030  		x86_64_alu2_reg_memindex_size((inst), 0x0f, 0xb7, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  3031  	}while(0)
  3032  
  3033  /*
  3034   * cmov: conditional move
  3035   */
  3036  #define x86_64_cmov_reg_reg_size(inst, cond, dreg, sreg, is_signed, size) \
  3037  	do { \
  3038  		if((size) == 2) \
  3039  		{ \
  3040  			*(inst)++ = (unsigned char)0x66; \
  3041  		} \
  3042  		x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \
  3043  		*(inst)++ = (unsigned char)0x0f; \
  3044  		if((is_signed)) \
  3045  		{ \
  3046  			*(inst)++ = x86_cc_signed_map[(cond)] - 0x30; \
  3047  		} \
  3048  		else \
  3049  		{ \
  3050  			*(inst)++ = x86_cc_unsigned_map[(cond)] - 0x30; \
  3051  		} \
  3052  		x86_64_reg_emit((inst), (dreg), (sreg)); \
  3053  	} while (0)
  3054  
  3055  #define x86_64_cmov_reg_regp_size(inst, cond, dreg, sregp, is_signed, size) \
  3056  	do { \
  3057  		if((size) == 2) \
  3058  		{ \
  3059  			*(inst)++ = (unsigned char)0x66; \
  3060  		} \
  3061  		x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \
  3062  		*(inst)++ = (unsigned char)0x0f; \
  3063  		if((is_signed)) \
  3064  		{ \
  3065  			*(inst)++ = x86_cc_signed_map[(cond)] - 0x30; \
  3066  		} \
  3067  		else \
  3068  		{ \
  3069  			*(inst)++ = x86_cc_unsigned_map[(cond)] - 0x30; \
  3070  		} \
  3071  		x86_64_regp_emit((inst), (dreg), (sregp)); \
  3072  	} while (0)
  3073  
  3074  #define x86_64_cmov_reg_mem_size(inst, cond, dreg, mem, is_signed, size) \
  3075  	do { \
  3076  		if((size) == 2) \
  3077  		{ \
  3078  			*(inst)++ = (unsigned char)0x66; \
  3079  		} \
  3080  		x86_64_rex_emit((inst), (size), (dreg), 0, 0); \
  3081  		*(inst)++ = (unsigned char)0x0f; \
  3082  		if((is_signed)) \
  3083  		{ \
  3084  			*(inst)++ = x86_cc_signed_map[(cond)] - 0x30; \
  3085  		} \
  3086  		else \
  3087  		{ \
  3088  			*(inst)++ = x86_cc_unsigned_map[(cond)] - 0x30; \
  3089  		} \
  3090  		x86_64_mem_emit((inst), (dreg), (mem)); \
  3091  	} while (0)
  3092  
  3093  #define x86_64_cmov_reg_membase_size(inst, cond, dreg, basereg, disp, is_signed, size) \
  3094  	do { \
  3095  		if((size) == 2) \
  3096  		{ \
  3097  			*(inst)++ = (unsigned char)0x66; \
  3098  		} \
  3099  		x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \
  3100  		*(inst)++ = (unsigned char)0x0f; \
  3101  		if((is_signed)) \
  3102  		{ \
  3103  			*(inst)++ = x86_cc_signed_map[(cond)] - 0x30; \
  3104  		} \
  3105  		else \
  3106  		{ \
  3107  			*(inst)++ = x86_cc_unsigned_map[(cond)] - 0x30; \
  3108  		} \
  3109  		x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \
  3110  	} while (0)
  3111  
  3112  #define x86_64_cmov_reg_memindex_size(inst, cond, dreg, basereg, disp, indexreg, shift, is_signed, size) \
  3113  	do { \
  3114  		if((size) == 2) \
  3115  		{ \
  3116  			*(inst)++ = (unsigned char)0x66; \
  3117  		} \
  3118  		x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \
  3119  		*(inst)++ = (unsigned char)0x0f; \
  3120  		if((is_signed)) \
  3121  		{ \
  3122  			*(inst)++ = x86_cc_signed_map[(cond)] - 0x30; \
  3123  		} \
  3124  		else \
  3125  		{ \
  3126  			*(inst)++ = x86_cc_unsigned_map[(cond)] - 0x30; \
  3127  		} \
  3128  		x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \
  3129  	} while (0)
  3130  
  3131  /*
  3132   * Stack manupulation instructions (push and pop)
  3133   */
  3134  
  3135  /*
  3136   * Push instructions have a default size of 64 bit. mode.
  3137   * There is no way to encode a 32 bit push.
  3138   * So only the sizes 8 and 2 are allowed in 64 bit mode.
  3139   */
  3140  #define x86_64_push_reg_size(inst, reg, size) \
  3141  	do { \
  3142  		if((size) == 2) \
  3143  		{ \
  3144  			*(inst)++ = (unsigned char)0x66; \
  3145  		} \
  3146  		x86_64_rex_emit64((inst), (size), 0, 0, (reg)); \
  3147  		*(inst)++ = (unsigned char)0x50 + ((reg) & 0x7); \
  3148  	} while(0)
  3149  
  3150  #define x86_64_push_regp_size(inst, sregp, size) \
  3151  	do { \
  3152  		if((size) == 2) \
  3153  		{ \
  3154  			*(inst)++ = (unsigned char)0x66; \
  3155  		} \
  3156  		x86_64_rex_emit64((inst), (size), 0, 0, (sregp)); \
  3157  		*(inst)++ = (unsigned char)0xff; \
  3158  		x86_64_regp_emit((inst), 6, (sregp)); \
  3159  	} while(0)
  3160  
  3161  #define x86_64_push_mem_size(inst, mem, size) \
  3162  	do { \
  3163  		if((size) == 2) \
  3164  		{ \
  3165  			*(inst)++ = (unsigned char)0x66; \
  3166  		} \
  3167  		x86_64_rex_emit64((inst), (size), 0, 0, 0); \
  3168  		*(inst)++ = (unsigned char)0xff; \
  3169  		x86_64_mem_emit((inst), 6, (mem)); \
  3170  	} while(0)
  3171  
  3172  #define x86_64_push_membase_size(inst, basereg, disp, size) \
  3173  	do { \
  3174  		if((size) == 2) \
  3175  		{ \
  3176  			*(inst)++ = (unsigned char)0x66; \
  3177  		} \
  3178  		x86_64_rex_emit64((inst), (size), 0, 0, (basereg)); \
  3179  		*(inst)++ = (unsigned char)0xff; \
  3180  		x86_64_membase_emit((inst), 6, (basereg), (disp)); \
  3181  	} while(0)
  3182  
  3183  #define x86_64_push_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  3184  	do { \
  3185  		if((size) == 2) \
  3186  		{ \
  3187  			*(inst)++ = (unsigned char)0x66; \
  3188  		} \
  3189  		x86_64_rex_emit64((inst), (size), 0, (indexreg), (basereg)); \
  3190  		*(inst)++ = (unsigned char)0xff; \
  3191  		x86_64_memindex_emit((inst), 6, (basereg), (disp), (indexreg), (shift)); \
  3192  	} while(0)
  3193  
  3194  /*
  3195   * We can push only 32 bit immediate values.
  3196   * The value is sign extended to 64 bit on the stack.
  3197   */
  3198  #define x86_64_push_imm(inst, imm) \
  3199  	do { \
  3200  		int _imm = (int) (imm); \
  3201  		if(x86_is_imm8(_imm)) \
  3202  		{ \
  3203  			*(inst)++ = (unsigned char)0x6A; \
  3204  			x86_imm_emit8 ((inst), (_imm)); \
  3205  		} \
  3206  		else \
  3207  		{ \
  3208  			*(inst)++ = (unsigned char)0x68; \
  3209  			x86_imm_emit32((inst), (_imm)); \
  3210  		} \
  3211  	} while(0)
  3212  
  3213  /*
  3214   * Use this version if you need a specific width of the value
  3215   * pushed. The Value on the stack will allways be 64bit wide.
  3216   */
  3217  #define x86_64_push_imm_size(inst, imm, size) \
  3218  	do { \
  3219  		switch(size) \
  3220  		{ \
  3221  			case 1: \
  3222  			{ \
  3223  				*(inst)++ = (unsigned char)0x6A; \
  3224  				x86_imm_emit8((inst), (imm)); \
  3225  			} \
  3226  			break; \
  3227  			case 2: \
  3228  			{ \
  3229  				*(inst)++ = (unsigned char)0x66; \
  3230  				*(inst)++ = (unsigned char)0x68; \
  3231  				x86_imm_emit16((inst), (imm)); \
  3232  			} \
  3233  			break; \
  3234  			case 4: \
  3235  			{ \
  3236  				*(inst)++ = (unsigned char)0x68; \
  3237  				x86_imm_emit32((inst), (imm)); \
  3238  			}\
  3239  		} \
  3240  	} while (0)
  3241  
  3242  
  3243  /*
  3244   * Pop instructions have a default size of 64 bit in 64 bit mode.
  3245   * There is no way to encode a 32 bit pop.
  3246   * So only the sizes 2 and 8 are allowed.
  3247   */
  3248  #define x86_64_pop_reg_size(inst, dreg, size) \
  3249  	do { \
  3250  		if((size) == 2) \
  3251  		{ \
  3252  			*(inst)++ = (unsigned char)0x66; \
  3253  		} \
  3254  		x86_64_rex_emit64((inst), 0, 0, 0, (dreg)); \
  3255  		*(inst)++ = (unsigned char)0x58 + ((dreg) & 0x7); \
  3256  	} while(0)
  3257  
  3258  #define x86_64_pop_regp_size(inst, dregp, size) \
  3259  	do { \
  3260  		if((size) == 2) \
  3261  		{ \
  3262  			*(inst)++ = (unsigned char)0x66; \
  3263  		} \
  3264  		x86_64_rex_emit64((inst), (size), 0, 0, (dregp)); \
  3265  		*(inst)++ = (unsigned char)0x8f; \
  3266  		x86_64_regp_emit((inst), 0, (dregp)); \
  3267  	} while(0)
  3268  
  3269  #define x86_64_pop_mem_size(inst, mem, size) \
  3270  	do { \
  3271  		if((size) == 2) \
  3272  		{ \
  3273  			*(inst)++ = (unsigned char)0x66; \
  3274  		} \
  3275  		*(inst)++ = (unsigned char)0x8f; \
  3276  		x86_64_mem_emit((inst), 0, (mem)); \
  3277  	} while(0)
  3278  
  3279  #define x86_64_pop_membase_size(inst, basereg, disp, size) \
  3280  	do { \
  3281  		if((size) == 2) \
  3282  		{ \
  3283  			*(inst)++ = (unsigned char)0x66; \
  3284  		} \
  3285  		x86_64_rex_emit64((inst), (size), 0, 0,(basereg)); \
  3286  		*(inst)++ = (unsigned char)0x8f; \
  3287  		x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  3288  	} while(0)
  3289  
  3290  #define x86_64_pop_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  3291  	do { \
  3292  		if((size) == 2) \
  3293  		{ \
  3294  			*(inst)++ = (unsigned char)0x66; \
  3295  		} \
  3296  		x86_64_rex_emit64((inst), (size), 0, (indexreg), (basereg)); \
  3297  		*(inst)++ = (unsigned char)0x8f; \
  3298  		x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \
  3299  	} while(0)
  3300  
  3301  /*
  3302   * control flow change instructions
  3303   */
  3304  
  3305  /*
  3306   * call
  3307   */
  3308  
  3309  /*
  3310   * call_imm is a relative call.
  3311   * imm has to be a 32bit offset from the instruction following the
  3312   * call instruction (absolute - (inst + 5)).
  3313   * For offsets greater that 32bit an indirect call (via register)
  3314   * has to be used.
  3315   */
  3316  #define x86_64_call_imm(inst, imm) \
  3317  	do { \
  3318  		x86_call_imm((inst), (imm)); \
  3319  	} while(0)
  3320  
  3321  #define x86_64_call_reg(inst, reg) \
  3322  	do { \
  3323  		x86_64_alu1_reg((inst), 0xff, 2, (reg)); \
  3324  	} while(0)
  3325  
  3326  #define x86_64_call_regp(inst, regp) \
  3327  	do { \
  3328  		x86_64_alu1_regp((inst), 0xff, 2, (regp)); \
  3329  	} while(0)
  3330  
  3331  /*
  3332   * call_mem is a absolute indirect call.
  3333   * To be able to use this instruction the address must be either
  3334   * in the lowest 2GB or in the highest 2GB addressrange.
  3335   * This is because mem is sign extended to 64bit.
  3336   */
  3337  #define x86_64_call_mem(inst, mem) \
  3338  	do { \
  3339  		x86_64_alu1_mem((inst), 0xff, 2, (mem)); \
  3340  	} while(0)
  3341  
  3342  #define x86_64_call_membase(inst, basereg, disp) \
  3343  	do { \
  3344  		x86_64_alu1_membase((inst), 0xff, 2, (basereg), (disp)); \
  3345  	} while(0)
  3346  
  3347  #define x86_64_call_memindex(inst, basereg, disp, indexreg, shift) \
  3348  	do { \
  3349  		x86_64_alu1_memindex((inst), 0xff, 2, (basereg), (disp), (indexreg), (shift)); \
  3350  	} while(0)
  3351  
  3352  /*
  3353   * jmp
  3354   */
  3355  
  3356  /*
  3357   * unconditional relative jumps
  3358   */
  3359  #define x86_64_jmp_imm8(inst, disp) \
  3360  	do { \
  3361  		*(inst)++ = (unsigned char)0xEB; \
  3362  		x86_imm_emit8((inst), (disp)); \
  3363  	} while(0)
  3364  
  3365  #define x86_64_jmp_imm(inst, disp) \
  3366  	do { \
  3367  		*(inst)++ = (unsigned char)0xE9; \
  3368  		x86_imm_emit32((inst), (disp)); \
  3369  	} while(0)
  3370  
  3371  /*
  3372   * unconditional indirect jumps
  3373   */
  3374  #define x86_64_jmp_reg(inst, reg) \
  3375  	do { \
  3376  		x86_64_alu1_reg((inst), 0xff, 4, (reg)); \
  3377  	} while(0)
  3378  
  3379  #define x86_64_jmp_regp(inst, regp) \
  3380  	do { \
  3381  		x86_64_alu1_regp((inst), 0xff, 4, (regp)); \
  3382  	} while(0)
  3383  
  3384  #define x86_64_jmp_mem(inst, mem) \
  3385  	do { \
  3386  		x86_64_alu1_mem((inst), 0xff, 4, (mem)); \
  3387  	} while(0)
  3388  
  3389  #define x86_64_jmp_membase(inst, basereg, disp) \
  3390  	do { \
  3391  		x86_64_alu1_membase((inst), 0xff, 4, (basereg), (disp)); \
  3392  	} while(0)
  3393  
  3394  #define x86_64_jmp_memindex(inst, basereg, disp, indexreg, shift) \
  3395  	do { \
  3396  		x86_64_alu1_memindex((inst), 0xff, 4, (basereg), (disp), (indexreg), (shift)); \
  3397  	} while(0)
  3398  
  3399  /*
  3400   * Set the low byte in a register to 0x01 if a condition is met
  3401   * or 0x00 otherwise.
  3402   */
  3403  #define x86_64_set_reg(inst, cond, dreg, is_signed) \
  3404  	do { \
  3405  		x86_64_rex_emit((inst), 1, 0, 0, (dreg)); \
  3406  		*(inst)++ = (unsigned char)0x0f; \
  3407  		if((is_signed)) \
  3408  		{ \
  3409  			*(inst)++ = x86_cc_signed_map[(cond)] + 0x20; \
  3410  		} \
  3411  		else \
  3412  		{ \
  3413  			*(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20; \
  3414  		} \
  3415  		x86_64_reg_emit((inst), 0, (dreg)); \
  3416  	} while(0)
  3417  
  3418  #define x86_64_set_mem(inst, cond, mem, is_signed) \
  3419  	do { \
  3420  		*(inst)++ = (unsigned char)0x0f; \
  3421  		if((is_signed)) \
  3422  		{ \
  3423  			*(inst)++ = x86_cc_signed_map[(cond)] + 0x20; \
  3424  		} \
  3425  		else \
  3426  		{ \
  3427  			*(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20; \
  3428  		} \
  3429  		x86_64_mem_emit((inst), 0, (mem)); \
  3430  	} while(0)
  3431  
  3432  #define x86_64_set_membase(inst, cond, basereg, disp, is_signed) \
  3433  	do { \
  3434  		x86_64_rex_emit((inst), 4, 0, 0, (basereg)); \
  3435  		*(inst)++ = (unsigned char)0x0f; \
  3436  		if((is_signed)) \
  3437  		{ \
  3438  			*(inst)++ = x86_cc_signed_map[(cond)] + 0x20;	\
  3439  		} \
  3440  		else	\
  3441  		{ \
  3442  			*(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20;	\
  3443  		} \
  3444  		x86_64_membase_emit((inst), 0, (basereg), (disp));	\
  3445  	} while(0)
  3446  
  3447  /*
  3448   * ret
  3449   */
  3450  #define x86_64_ret(inst) \
  3451  	do { \
  3452  		x86_ret((inst)); \
  3453  	} while(0)
  3454  
  3455  /*
  3456   * xchg: Exchange values
  3457   */
  3458  #define x86_64_xchg_reg_reg_size(inst, dreg, sreg, size) \
  3459  	do { \
  3460  		if(((size) > 1) && ((dreg) == X86_64_RAX || (sreg) == X86_64_RAX)) \
  3461  		{ \
  3462  			if((size) == 2) \
  3463  			{ \
  3464  				*(inst)++ = (unsigned char)0x66; \
  3465  			} \
  3466  			if((dreg) == X86_64_RAX) \
  3467  			{ \
  3468  				x86_64_rex_emit((inst), (size), 0, 0, (sreg)); \
  3469  				*(inst)++ = (unsigned char)(0x90 + (unsigned char)(sreg & 0x7)); \
  3470  			} \
  3471  			else \
  3472  			{ \
  3473  				x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \
  3474  				*(inst)++ = (unsigned char)(0x90 + (unsigned char)(dreg & 0x7)); \
  3475  			} \
  3476  		} \
  3477  		else \
  3478  		{ \
  3479  			if((size) == 1) \
  3480  			{ \
  3481  				x86_64_alu1_reg_reg_size((inst), 0x86, (dreg), (sreg), (size)); \
  3482  			} \
  3483  			else \
  3484  			{ \
  3485  				x86_64_alu1_reg_reg_size((inst), 0x87, (dreg), (sreg), (size)); \
  3486  			} \
  3487  		} \
  3488  	} while(0)
  3489  
  3490  /*
  3491   * XMM instructions
  3492   */
  3493  
  3494  /*
  3495   * xmm instructions with two opcodes
  3496   */
  3497  #define x86_64_xmm2_reg_reg(inst, opc1, opc2, r, reg) \
  3498  	do { \
  3499  		x86_64_rex_emit(inst, 0, (r), 0, (reg)); \
  3500  		*(inst)++ = (unsigned char)(opc1); \
  3501  		*(inst)++ = (unsigned char)(opc2); \
  3502  		x86_64_reg_emit(inst, (r), (reg)); \
  3503  	} while(0)
  3504  
  3505  #define x86_64_xmm2_reg_regp(inst, opc1, opc2, r, regp) \
  3506  	do { \
  3507  		x86_64_rex_emit(inst, 0, (r), 0, (regp)); \
  3508  		*(inst)++ = (unsigned char)(opc1); \
  3509  		*(inst)++ = (unsigned char)(opc2); \
  3510  		x86_64_regp_emit(inst, (r), (regp)); \
  3511  	} while(0)
  3512  
  3513  #define x86_64_xmm2_reg_mem(inst, opc1, opc2, r, mem) \
  3514  	do { \
  3515  		x86_64_rex_emit(inst, 0, (r), 0, 0); \
  3516  		*(inst)++ = (unsigned char)(opc1); \
  3517  		*(inst)++ = (unsigned char)(opc2); \
  3518  		x86_64_mem_emit(inst, (r), (mem)); \
  3519  	} while(0)
  3520  
  3521  #define x86_64_xmm2_reg_membase(inst, opc1, opc2, r, basereg, disp) \
  3522  	do { \
  3523  		x86_64_rex_emit(inst, 0, (r), 0, (basereg)); \
  3524  		*(inst)++ = (unsigned char)(opc1); \
  3525  		*(inst)++ = (unsigned char)(opc2); \
  3526  		x86_64_membase_emit(inst, (r), (basereg), (disp)); \
  3527  	} while(0)
  3528  
  3529  #define x86_64_xmm2_reg_memindex(inst, opc1, opc2, r, basereg, disp, indexreg, shift) \
  3530  	do { \
  3531  		x86_64_rex_emit(inst, 0, (r), (indexreg), (basereg)); \
  3532  		*(inst)++ = (unsigned char)(opc1); \
  3533  		*(inst)++ = (unsigned char)(opc2); \
  3534  		x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \
  3535  	} while(0)
  3536  
  3537  /*
  3538   * xmm instructions with a prefix and two opcodes
  3539   */
  3540  #define x86_64_p1_xmm2_reg_reg_size(inst, p1, opc1, opc2, r, reg, size) \
  3541  	do { \
  3542  		*(inst)++ = (unsigned char)(p1); \
  3543  		x86_64_rex_emit(inst, (size), (r), 0, (reg)); \
  3544  		*(inst)++ = (unsigned char)(opc1); \
  3545  		*(inst)++ = (unsigned char)(opc2); \
  3546  		x86_64_reg_emit(inst, (r), (reg)); \
  3547  	} while(0)
  3548  
  3549  #define x86_64_p1_xmm2_reg_regp_size(inst, p1, opc1, opc2, r, regp, size) \
  3550  	do { \
  3551  		*(inst)++ = (unsigned char)(p1); \
  3552  		x86_64_rex_emit(inst, (size), (r), 0, (regp)); \
  3553  		*(inst)++ = (unsigned char)(opc1); \
  3554  		*(inst)++ = (unsigned char)(opc2); \
  3555  		x86_64_regp_emit(inst, (r), (regp)); \
  3556  	} while(0)
  3557  
  3558  #define x86_64_p1_xmm2_reg_mem_size(inst, p1, opc1, opc2, r, mem, size) \
  3559  	do { \
  3560  		*(inst)++ = (unsigned char)(p1); \
  3561  		x86_64_rex_emit(inst, (size), (r), 0, 0); \
  3562  		*(inst)++ = (unsigned char)(opc1); \
  3563  		*(inst)++ = (unsigned char)(opc2); \
  3564  		x86_64_mem_emit(inst, (r), (mem)); \
  3565  	} while(0)
  3566  
  3567  #define x86_64_p1_xmm2_reg_membase_size(inst, p1, opc1, opc2, r, basereg, disp, size) \
  3568  	do { \
  3569  		*(inst)++ = (unsigned char)(p1); \
  3570  		x86_64_rex_emit(inst, (size), (r), 0, (basereg)); \
  3571  		*(inst)++ = (unsigned char)(opc1); \
  3572  		*(inst)++ = (unsigned char)(opc2); \
  3573  		x86_64_membase_emit(inst, (r), (basereg), (disp)); \
  3574  	} while(0)
  3575  
  3576  #define x86_64_p1_xmm2_reg_memindex_size(inst, p1, opc1, opc2, r, basereg, disp, indexreg, shift, size) \
  3577  	do { \
  3578  		*(inst)++ = (unsigned char)(p1); \
  3579  		x86_64_rex_emit(inst, (size), (r), (indexreg), (basereg)); \
  3580  		*(inst)++ = (unsigned char)(opc1); \
  3581  		*(inst)++ = (unsigned char)(opc2); \
  3582  		x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \
  3583  	} while(0)
  3584  
  3585  /*
  3586   * xmm instructions with a prefix and three opcodes
  3587   */
  3588  #define x86_64_p1_xmm3_reg_reg_size(inst, p1, opc1, opc2, opc3, r, reg, size) \
  3589  	do { \
  3590  		*(inst)++ = (unsigned char)(p1); \
  3591  		x86_64_rex_emit(inst, (size), (r), 0, (reg)); \
  3592  		*(inst)++ = (unsigned char)(opc1); \
  3593  		*(inst)++ = (unsigned char)(opc2); \
  3594  		*(inst)++ = (unsigned char)(opc3); \
  3595  		x86_64_reg_emit(inst, (r), (reg)); \
  3596  	} while(0)
  3597  
  3598  #define x86_64_p1_xmm3_reg_regp_size(inst, p1, opc1, opc2, opc3, r, regp, size) \
  3599  	do { \
  3600  		*(inst)++ = (unsigned char)(p1); \
  3601  		x86_64_rex_emit(inst, (size), (r), 0, (regp)); \
  3602  		*(inst)++ = (unsigned char)(opc1); \
  3603  		*(inst)++ = (unsigned char)(opc2); \
  3604  		*(inst)++ = (unsigned char)(opc3); \
  3605  		x86_64_regp_emit(inst, (r), (regp)); \
  3606  	} while(0)
  3607  
  3608  #define x86_64_p1_xmm3_reg_mem_size(inst, p1, opc1, opc2, opc3, r, mem, size) \
  3609  	do { \
  3610  		*(inst)++ = (unsigned char)(p1); \
  3611  		x86_64_rex_emit(inst, (size), (r), 0, 0); \
  3612  		*(inst)++ = (unsigned char)(opc1); \
  3613  		*(inst)++ = (unsigned char)(opc2); \
  3614  		*(inst)++ = (unsigned char)(opc3); \
  3615  		x86_64_mem_emit(inst, (r), (mem)); \
  3616  	} while(0)
  3617  
  3618  #define x86_64_p1_xmm3_reg_membase_size(inst, p1, opc1, opc2, opc3, r, basereg, disp, size) \
  3619  	do { \
  3620  		*(inst)++ = (unsigned char)(p1); \
  3621  		x86_64_rex_emit(inst, (size), (r), 0, (basereg)); \
  3622  		*(inst)++ = (unsigned char)(opc1); \
  3623  		*(inst)++ = (unsigned char)(opc2); \
  3624  		*(inst)++ = (unsigned char)(opc3); \
  3625  		x86_64_membase_emit(inst, (r), (basereg), (disp)); \
  3626  	} while(0)
  3627  
  3628  #define x86_64_p1_xmm3_reg_memindex_size(inst, p1, opc1, opc2, opc3, r, basereg, disp, indexreg, shift, size) \
  3629  	do { \
  3630  		*(inst)++ = (unsigned char)(p1); \
  3631  		x86_64_rex_emit(inst, (size), (r), (indexreg), (basereg)); \
  3632  		*(inst)++ = (unsigned char)(opc1); \
  3633  		*(inst)++ = (unsigned char)(opc2); \
  3634  		*(inst)++ = (unsigned char)(opc3); \
  3635  		x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \
  3636  	} while(0)
  3637  
  3638  /*
  3639   * xmm1: Macro for use of the X86_64_XMM1 enum
  3640   */
  3641  #define x86_64_xmm1_reg_reg(inst, opc, dreg, sreg, is_double) \
  3642  	do { \
  3643  		x86_64_p1_xmm2_reg_reg_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (sreg), 0); \
  3644  	} while(0)
  3645  
  3646  #define x86_64_xmm1_reg_regp(inst, opc, dreg, sregp, is_double) \
  3647  	do { \
  3648  		x86_64_p1_xmm2_reg_regp_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (sregp), 0); \
  3649  	} while(0)
  3650  
  3651  #define x86_64_xmm1_reg_mem(inst, opc, dreg, mem, is_double) \
  3652  	do { \
  3653  		x86_64_p1_xmm2_reg_mem_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (mem), 0); \
  3654  	} while(0)
  3655  
  3656  #define x86_64_xmm1_reg_membase(inst, opc, dreg, basereg, disp, is_double) \
  3657  	do { \
  3658  		x86_64_p1_xmm2_reg_membase_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (basereg), (disp), 0); \
  3659  	} while(0)
  3660  
  3661  #define x86_64_xmm1_reg_memindex(inst, opc, dreg, basereg, disp, indexreg, shift, is_double) \
  3662  	do { \
  3663  		x86_64_p1_xmm2_reg_memindex_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  3664  	} while(0)
  3665  
  3666  /*
  3667   * Load and store MXCSR register state
  3668   */
  3669  
  3670  /*
  3671   * ldmxcsr: Load MXCSR register
  3672   */
  3673  #define x86_64_ldmxcsr_regp(inst, sregp) \
  3674  	do { \
  3675  		x86_64_xmm2_reg_regp((inst), 0x0f, 0xae, 2, (sregp)); \
  3676  	} while(0)
  3677  
  3678  #define x86_64_ldmxcsr_mem(inst, mem) \
  3679  	do { \
  3680  		x86_64_xmm2_reg_mem((inst), 0x0f, 0xae, 2, (mem)); \
  3681  	} while(0)
  3682  
  3683  #define x86_64_ldmxcsr_membase(inst, basereg, disp) \
  3684  	do { \
  3685  		x86_64_xmm2_reg_membase((inst), 0x0f, 0xae, 2, (basereg), (disp)); \
  3686  	} while(0)
  3687  
  3688  #define x86_64_ldmxcsr_memindex(inst, basereg, disp, indexreg, shift) \
  3689  	do { \
  3690  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0xae, 2, (basereg), (disp), (indexreg), (shift)); \
  3691  	} while(0)
  3692  
  3693  /*
  3694   * stmxcsr: Store MXCSR register
  3695   */
  3696  #define x86_64_stmxcsr_regp(inst, sregp) \
  3697  	do { \
  3698  		x86_64_xmm2_reg_regp((inst), 0x0f, 0xae, 3, (sregp)); \
  3699  	} while(0)
  3700  
  3701  #define x86_64_stmxcsr_mem(inst, mem) \
  3702  	do { \
  3703  		x86_64_xmm2_reg_mem((inst), 0x0f, 0xae, 3, (mem)); \
  3704  	} while(0)
  3705  
  3706  #define x86_64_stmxcsr_membase(inst, basereg, disp) \
  3707  	do { \
  3708  		x86_64_xmm2_reg_membase((inst), 0x0f, 0xae, 3, (basereg), (disp)); \
  3709  	} while(0)
  3710  
  3711  #define x86_64_stmxcsr_memindex(inst, basereg, disp, indexreg, shift) \
  3712  	do { \
  3713  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0xae, 3, (basereg), (disp), (indexreg), (shift)); \
  3714  	} while(0)
  3715  
  3716  /*
  3717   * Move instructions
  3718   */
  3719  
  3720  /*
  3721   * movd: Move doubleword from/to xmm register
  3722   */
  3723  #define x86_64_movd_xreg_reg(inst, dreg, sreg) \
  3724  	do { \
  3725  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sreg), 4); \
  3726  	} while(0)
  3727  
  3728  #define x86_64_movd_xreg_mem(inst, dreg, mem) \
  3729  	do { \
  3730  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x6e, (dreg), (mem), 4); \
  3731  	} while(0)
  3732  
  3733  #define x86_64_movd_xreg_regp(inst, dreg, sregp) \
  3734  	do { \
  3735  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sregp), 4); \
  3736  	} while(0)
  3737  
  3738  #define x86_64_movd_xreg_membase(inst, dreg, basereg, disp) \
  3739  	do { \
  3740  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), 4); \
  3741  	} while(0)
  3742  
  3743  #define x86_64_movd_xreg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  3744  	do { \
  3745  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), (indexreg), (shift), 4); \
  3746  	} while(0)
  3747  
  3748  #define x86_64_movd_reg_xreg(inst, dreg, sreg) \
  3749  	do { \
  3750  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dreg), 4); \
  3751  	} while(0)
  3752  
  3753  #define x86_64_movd_mem_xreg(inst, mem, sreg) \
  3754  	do { \
  3755  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x7e, (sreg), (mem), 4); \
  3756  	} while(0)
  3757  
  3758  #define x86_64_movd_regp_xreg(inst, dregp, sreg) \
  3759  	do { \
  3760  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dregp), 4); \
  3761  	} while(0)
  3762  
  3763  #define x86_64_movd_membase_xreg(inst, basereg, disp, sreg) \
  3764  	do { \
  3765  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), 4); \
  3766  	} while(0)
  3767  
  3768  #define x86_64_movd_memindex_xreg(inst, basereg, disp, indexreg, shift, sreg) \
  3769  	do { \
  3770  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), (indexreg), (shift), 4); \
  3771  	} while(0)
  3772  
  3773  /*
  3774   * movq: Move quadword from/to xmm register
  3775   */
  3776  #define x86_64_movq_xreg_reg(inst, dreg, sreg) \
  3777  	do { \
  3778  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sreg), 8); \
  3779  	} while(0)
  3780  
  3781  #define x86_64_movq_xreg_mem(inst, dreg, mem) \
  3782  	do { \
  3783  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x6e, (dreg), (mem), 8); \
  3784  	} while(0)
  3785  
  3786  #define x86_64_movq_xreg_regp(inst, dreg, sregp) \
  3787  	do { \
  3788  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sregp), 8); \
  3789  	} while(0)
  3790  
  3791  #define x86_64_movq_xreg_membase(inst, dreg, basereg, disp) \
  3792  	do { \
  3793  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), 8); \
  3794  	} while(0)
  3795  
  3796  #define x86_64_movq_xreg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  3797  	do { \
  3798  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), (indexreg), (shift), 8); \
  3799  	} while(0)
  3800  
  3801  #define x86_64_movq_reg_xreg(inst, dreg, sreg) \
  3802  	do { \
  3803  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dreg), 8); \
  3804  	} while(0)
  3805  
  3806  #define x86_64_movq_mem_xreg(inst, mem, sreg) \
  3807  	do { \
  3808  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x7e, (sreg), (mem), 8); \
  3809  	} while(0)
  3810  
  3811  #define x86_64_movq_regp_xreg(inst, dregp, sreg) \
  3812  	do { \
  3813  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dregp), 8); \
  3814  	} while(0)
  3815  
  3816  #define x86_64_movq_membase_xreg(inst, basereg, disp, sreg) \
  3817  	do { \
  3818  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), 8); \
  3819  	} while(0)
  3820  
  3821  #define x86_64_movq_memindex_xreg(inst, basereg, disp, indexreg, shift, sreg) \
  3822  	do { \
  3823  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), (indexreg), (shift), 8); \
  3824  	} while(0)
  3825  
  3826  /*
  3827   * movaps: Move aligned quadword (16 bytes)
  3828   */
  3829  #define x86_64_movaps_reg_reg(inst, dreg, sreg) \
  3830  	do { \
  3831  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x28, (dreg), (sreg)); \
  3832  	} while(0)
  3833  
  3834  #define x86_64_movaps_regp_reg(inst, dregp, sreg) \
  3835  	do { \
  3836  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x29, (sreg), (dregp)); \
  3837  	} while(0)
  3838  
  3839  #define x86_64_movaps_mem_reg(inst, mem, sreg) \
  3840  	do { \
  3841  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x29, (sreg), (mem)); \
  3842  	} while(0)
  3843  
  3844  #define x86_64_movaps_membase_reg(inst, basereg, disp, sreg) \
  3845  	do { \
  3846  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x29, (sreg), (basereg), (disp)); \
  3847  	} while(0)
  3848  
  3849  #define x86_64_movaps_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \
  3850  	do { \
  3851  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x29, (sreg), (basereg), (disp), (indexreg), (shift)); \
  3852  	} while(0)
  3853  
  3854  #define x86_64_movaps_reg_regp(inst, dreg, sregp) \
  3855  	do { \
  3856  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x28, (dreg), (sregp)); \
  3857  	} while(0)
  3858  
  3859  #define x86_64_movaps_reg_mem(inst, dreg, mem) \
  3860  	do { \
  3861  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x28, (dreg), (mem)); \
  3862  	} while(0)
  3863  
  3864  #define x86_64_movaps_reg_membase(inst, dreg, basereg, disp) \
  3865  	do { \
  3866  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x28, (dreg), (basereg), (disp)); \
  3867  	} while(0)
  3868  
  3869  #define x86_64_movaps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  3870  	do { \
  3871  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x28, (dreg), (basereg), (disp), (indexreg), (shift)); \
  3872  	} while(0)
  3873  
  3874  /*
  3875   * movups: Move unaligned quadword (16 bytes)
  3876   */
  3877  #define x86_64_movups_reg_reg(inst, dreg, sreg) \
  3878  	do { \
  3879  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x10, (dreg), (sreg)); \
  3880  	} while(0)
  3881  
  3882  #define x86_64_movups_regp_reg(inst, dregp, sreg) \
  3883  	do { \
  3884  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x11, (sreg), (dregp)); \
  3885  	} while(0)
  3886  
  3887  #define x86_64_movups_mem_reg(inst, mem, sreg) \
  3888  	do { \
  3889  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x11, (sreg), (mem)); \
  3890  	} while(0)
  3891  
  3892  #define x86_64_movups_membase_reg(inst, basereg, disp, sreg) \
  3893  	do { \
  3894  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x11, (sreg), (basereg), (disp)); \
  3895  	} while(0)
  3896  
  3897  #define x86_64_movups_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \
  3898  	do { \
  3899  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift)); \
  3900  	} while(0)
  3901  
  3902  #define x86_64_movups_reg_regp(inst, dreg, sregp) \
  3903  	do { \
  3904  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x10, (dreg), (sregp)); \
  3905  	} while(0)
  3906  
  3907  #define x86_64_movups_reg_mem(inst, dreg, mem) \
  3908  	do { \
  3909  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x10, (dreg), (mem)); \
  3910  	} while(0)
  3911  
  3912  #define x86_64_movups_reg_membase(inst, dreg, basereg, disp) \
  3913  	do { \
  3914  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x10, (dreg), (basereg), (disp)); \
  3915  	} while(0)
  3916  
  3917  #define x86_64_movups_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  3918  	do { \
  3919  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift)); \
  3920  	} while(0)
  3921  
  3922  /*
  3923   * movlhps: Move lower 64bit of sreg to higher 64bit of dreg
  3924   * movhlps: Move higher 64bit of sreg to lower 64bit of dreg
  3925   */
  3926  #define x86_64_movlhps(inst, dreg, sreg) \
  3927  	do { \
  3928  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x16, (dreg), (sreg)); \
  3929  	} while(0)
  3930  #define x86_64_movhlps(inst, dreg, sreg) \
  3931  	do { \
  3932  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x12, (dreg), (sreg)); \
  3933  	} while(0)
  3934  
  3935  /*
  3936   * movsd: Move scalar double (64bit float)
  3937   */
  3938  #define x86_64_movsd_reg_reg(inst, dreg, sreg) \
  3939  	do { \
  3940  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x10, (dreg), (sreg), 0); \
  3941  	} while(0)
  3942  
  3943  #define x86_64_movsd_regp_reg(inst, dregp, sreg) \
  3944  	do { \
  3945  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x11, (sreg), (dregp), 0); \
  3946  	} while(0)
  3947  
  3948  #define x86_64_movsd_mem_reg(inst, mem, sreg) \
  3949  	do { \
  3950  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x11, (sreg), (mem), 0); \
  3951  	} while(0)
  3952  
  3953  #define x86_64_movsd_membase_reg(inst, basereg, disp, sreg) \
  3954  	do { \
  3955  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp), 0); \
  3956  	} while(0)
  3957  
  3958  #define x86_64_movsd_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \
  3959  	do { \
  3960  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift), 0); \
  3961  	} while(0)
  3962  
  3963  #define x86_64_movsd_reg_regp(inst, dreg, sregp) \
  3964  	do { \
  3965  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x10, (dreg), (sregp), 0); \
  3966  	} while(0)
  3967  
  3968  #define x86_64_movsd_reg_mem(inst, dreg, mem) \
  3969  	do { \
  3970  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x10, (dreg), (mem), 0); \
  3971  	} while(0)
  3972  
  3973  #define x86_64_movsd_reg_membase(inst, dreg, basereg, disp) \
  3974  	do { \
  3975  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp), 0); \
  3976  	} while(0)
  3977  
  3978  #define x86_64_movsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  3979  	do { \
  3980  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  3981  	} while(0)
  3982  
  3983  /*
  3984   * movss: Move scalar single (32bit float)
  3985   */
  3986  #define x86_64_movss_reg_reg(inst, dreg, sreg) \
  3987  	do { \
  3988  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x10, (dreg), (sreg), 0); \
  3989  	} while(0)
  3990  
  3991  #define x86_64_movss_regp_reg(inst, dregp, sreg) \
  3992  	do { \
  3993  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x11, (sreg), (dregp), 0); \
  3994  	} while(0)
  3995  
  3996  #define x86_64_movss_mem_reg(inst, mem, sreg) \
  3997  	do { \
  3998  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x11, (sreg), (mem), 0); \
  3999  	} while(0)
  4000  
  4001  #define x86_64_movss_membase_reg(inst, basereg, disp, sreg) \
  4002  	do { \
  4003  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x11, (sreg), (basereg), (disp), 0); \
  4004  	} while(0)
  4005  
  4006  #define x86_64_movss_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \
  4007  	do { \
  4008  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift), 0); \
  4009  	} while(0)
  4010  
  4011  #define x86_64_movss_reg_regp(inst, dreg, sregp) \
  4012  	do { \
  4013  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x10, (dreg), (sregp), 0); \
  4014  	} while(0)
  4015  
  4016  #define x86_64_movss_reg_mem(inst, dreg, mem) \
  4017  	do { \
  4018  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x10, (dreg), (mem), 0); \
  4019  	} while(0)
  4020  
  4021  #define x86_64_movss_reg_membase(inst, dreg, basereg, disp) \
  4022  	do { \
  4023  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x10, (dreg), (basereg), (disp), 0); \
  4024  	} while(0)
  4025  
  4026  #define x86_64_movss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4027  	do { \
  4028  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4029  	} while(0)
  4030  
  4031  /*
  4032   * Conversion opcodes
  4033   */
  4034  
  4035  /*
  4036   * cvtsi2ss: Convert signed integer to float32
  4037   * The size is the size of the integer value (4 or 8)
  4038   */
  4039  #define x86_64_cvtsi2ss_reg_reg_size(inst, dxreg, sreg, size) \
  4040  	do { \
  4041  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (sreg), (size)); \
  4042  	} while(0)
  4043  
  4044  #define x86_64_cvtsi2ss_reg_regp_size(inst, dxreg, sregp, size) \
  4045  	do { \
  4046  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (sregp), (size)); \
  4047  	} while(0)
  4048  
  4049  #define x86_64_cvtsi2ss_reg_mem_size(inst, dxreg, mem, size) \
  4050  	do { \
  4051  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (mem), (size)); \
  4052  	} while(0)
  4053  
  4054  #define x86_64_cvtsi2ss_reg_membase_size(inst, dreg, basereg, disp, size) \
  4055  	do { \
  4056  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x2a, (dreg), (basereg), (disp), (size)); \
  4057  	} while(0)
  4058  
  4059  #define x86_64_cvtsi2ss_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4060  	do { \
  4061  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x2a, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4062  	} while(0)
  4063  
  4064  /*
  4065   * cvtsi2sd: Convert signed integer to float64
  4066   * The size is the size of the integer value (4 or 8)
  4067   */
  4068  #define x86_64_cvtsi2sd_reg_reg_size(inst, dxreg, sreg, size) \
  4069  	do { \
  4070  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (sreg), (size)); \
  4071  	} while(0)
  4072  
  4073  #define x86_64_cvtsi2sd_reg_regp_size(inst, dxreg, sregp, size) \
  4074  	do { \
  4075  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (sregp), (size)); \
  4076  	} while(0)
  4077  
  4078  #define x86_64_cvtsi2sd_reg_mem_size(inst, dxreg, mem, size) \
  4079  	do { \
  4080  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (mem), (size)); \
  4081  	} while(0)
  4082  
  4083  #define x86_64_cvtsi2sd_reg_membase_size(inst, dreg, basereg, disp, size) \
  4084  	do { \
  4085  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x2a, (dreg), (basereg), (disp), (size)); \
  4086  	} while(0)
  4087  
  4088  #define x86_64_cvtsi2sd_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4089  	do { \
  4090  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x2a, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4091  	} while(0)
  4092  
  4093  /*
  4094   * cvtss2si: Convert float32.to a signed integer using the rounding mode
  4095   * in the mxcsr register
  4096   * The size is the size of the integer value (4 or 8)
  4097   */
  4098  #define x86_64_cvtss2si_reg_reg_size(inst, dreg, sxreg, size) \
  4099  	do { \
  4100  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x2d, (dreg), (sxreg), (size)); \
  4101  	} while(0)
  4102  
  4103  #define x86_64_cvtss2si_reg_regp_size(inst, dreg, sregp, size) \
  4104  	do { \
  4105  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x2d, (dreg), (sregp), (size)); \
  4106  	} while(0)
  4107  
  4108  #define x86_64_cvtss2si_reg_mem_size(inst, dreg, mem, size) \
  4109  	do { \
  4110  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x2d, (dreg), (mem), (size)); \
  4111  	} while(0)
  4112  
  4113  #define x86_64_cvtss2si_reg_membase_size(inst, dreg, basereg, disp, size) \
  4114  	do { \
  4115  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x2d, (dreg), (basereg), (disp), (size)); \
  4116  	} while(0)
  4117  
  4118  #define x86_64_cvtss2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4119  	do { \
  4120  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x2d, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4121  	} while(0)
  4122  
  4123  /*
  4124   * cvttss2si: Convert float32.to a signed integer using the truncate rounding mode.
  4125   * The size is the size of the integer value (4 or 8)
  4126   */
  4127  #define x86_64_cvttss2si_reg_reg_size(inst, dreg, sxreg, size) \
  4128  	do { \
  4129  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x2c, (dreg), (sxreg), (size)); \
  4130  	} while(0)
  4131  
  4132  #define x86_64_cvttss2si_reg_regp_size(inst, dreg, sregp, size) \
  4133  	do { \
  4134  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x2c, (dreg), (sregp), (size)); \
  4135  	} while(0)
  4136  
  4137  #define x86_64_cvttss2si_reg_mem_size(inst, dreg, mem, size) \
  4138  	do { \
  4139  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x2c, (dreg), (mem), (size)); \
  4140  	} while(0)
  4141  
  4142  #define x86_64_cvttss2si_reg_membase_size(inst, dreg, basereg, disp, size) \
  4143  	do { \
  4144  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x2c, (dreg), (basereg), (disp), (size)); \
  4145  	} while(0)
  4146  
  4147  #define x86_64_cvttss2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4148  	do { \
  4149  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x2c, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4150  	} while(0)
  4151  
  4152  /*
  4153   * cvtsd2si: Convert float64 to a signed integer using the rounding mode
  4154   * in the mxcsr register
  4155   * The size is the size of the integer value (4 or 8)
  4156   */
  4157  #define x86_64_cvtsd2si_reg_reg_size(inst, dreg, sxreg, size) \
  4158  	do { \
  4159  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x2d, (dreg), (sxreg), (size)); \
  4160  	} while(0)
  4161  
  4162  #define x86_64_cvtsd2si_reg_regp_size(inst, dreg, sregp, size) \
  4163  	do { \
  4164  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x2d, (dreg), (sregp), (size)); \
  4165  	} while(0)
  4166  
  4167  #define x86_64_cvtsd2si_reg_mem_size(inst, dreg, mem, size) \
  4168  	do { \
  4169  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x2d, (dreg), (mem), (size)); \
  4170  	} while(0)
  4171  
  4172  #define x86_64_cvtsd2si_reg_membase_size(inst, dreg, basereg, disp, size) \
  4173  	do { \
  4174  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x2d, (dreg), (basereg), (disp), (size)); \
  4175  	} while(0)
  4176  
  4177  #define x86_64_cvtsd2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4178  	do { \
  4179  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x2d, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4180  	} while(0)
  4181  
  4182  /*
  4183   * cvttsd2si: Convert float64 to a signed integer using the truncate rounding mode.
  4184   * The size is the size of the integer value (4 or 8)
  4185   */
  4186  #define x86_64_cvttsd2si_reg_reg_size(inst, dreg, sxreg, size) \
  4187  	do { \
  4188  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x2c, (dreg), (sxreg), (size)); \
  4189  	} while(0)
  4190  
  4191  #define x86_64_cvttsd2si_reg_regp_size(inst, dreg, sregp, size) \
  4192  	do { \
  4193  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x2c, (dreg), (sregp), (size)); \
  4194  	} while(0)
  4195  
  4196  #define x86_64_cvttsd2si_reg_mem_size(inst, dreg, mem, size) \
  4197  	do { \
  4198  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x2c, (dreg), (mem), (size)); \
  4199  	} while(0)
  4200  
  4201  #define x86_64_cvttsd2si_reg_membase_size(inst, dreg, basereg, disp, size) \
  4202  	do { \
  4203  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x2c, (dreg), (basereg), (disp), (size)); \
  4204  	} while(0)
  4205  
  4206  #define x86_64_cvttsd2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \
  4207  	do { \
  4208  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x2c, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \
  4209  	} while(0)
  4210  
  4211  /*
  4212   * cvtss2sd: Convert float32 to float64
  4213   */
  4214  #define x86_64_cvtss2sd_reg_reg(inst, dreg, sreg) \
  4215  	do { \
  4216  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (sreg), 0); \
  4217  	} while(0)
  4218  
  4219  #define x86_64_cvtss2sd_reg_regp(inst, dxreg, sregp) \
  4220  	do { \
  4221  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5a, (dxreg), (sregp), 0); \
  4222  	} while(0)
  4223  
  4224  #define x86_64_cvtss2sd_reg_mem(inst, dxreg, mem) \
  4225  	do { \
  4226  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5a, (dxreg), (mem), 0); \
  4227  	} while(0)
  4228  
  4229  #define x86_64_cvtss2sd_reg_membase(inst, dreg, basereg, disp) \
  4230  	do { \
  4231  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (basereg), (disp), 0); \
  4232  	} while(0)
  4233  
  4234  #define x86_64_cvtss2sd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4235  	do { \
  4236  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4237  	} while(0)
  4238  
  4239  /*
  4240   * cvtsd2ss: Convert float64 to float32
  4241   */
  4242  #define x86_64_cvtsd2ss_reg_reg(inst, dreg, sreg) \
  4243  	do { \
  4244  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (sreg), 0); \
  4245  	} while(0)
  4246  
  4247  #define x86_64_cvtsd2ss_reg_regp(inst, dxreg, sregp) \
  4248  	do { \
  4249  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5a, (dxreg), (sregp), 0); \
  4250  	} while(0)
  4251  
  4252  #define x86_64_cvtsd2ss_reg_mem(inst, dxreg, mem) \
  4253  	do { \
  4254  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5a, (dxreg), (mem), 0); \
  4255  	} while(0)
  4256  
  4257  #define x86_64_cvtsd2ss_reg_membase(inst, dreg, basereg, disp) \
  4258  	do { \
  4259  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (basereg), (disp), 0); \
  4260  	} while(0)
  4261  
  4262  #define x86_64_cvtsd2ss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4263  	do { \
  4264  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4265  	} while(0)
  4266  
  4267  /*
  4268   * Compare opcodes
  4269   */
  4270  
  4271  /*
  4272   * comiss: Compare ordered scalar single precision values
  4273   */
  4274  #define x86_64_comiss_reg_reg(inst, dreg, sreg) \
  4275  	do { \
  4276  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x2f, (dreg), (sreg)); \
  4277  	} while(0)
  4278  
  4279  #define x86_64_comiss_reg_regp(inst, dreg, sregp) \
  4280  	do { \
  4281  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x2f, (dreg), (sregp)); \
  4282  	} while(0)
  4283  
  4284  #define x86_64_comiss_reg_mem(inst, dreg, mem) \
  4285  	do { \
  4286  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x2f, (dreg), (mem)); \
  4287  	} while(0)
  4288  
  4289  #define x86_64_comiss_reg_membase(inst, dreg, basereg, disp) \
  4290  	do { \
  4291  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x2f, (dreg), (basereg), (disp)); \
  4292  	} while(0)
  4293  
  4294  #define x86_64_comiss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4295  	do { \
  4296  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x2f, (dreg), (basereg), (disp), (indexreg), (shift)); \
  4297  	} while(0)
  4298  
  4299  /*
  4300   * comisd: Compare ordered scalar double precision values
  4301   */
  4302  #define x86_64_comisd_reg_reg(inst, dreg, sreg) \
  4303  	do { \
  4304  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x2f, (dreg), (sreg), 0); \
  4305  	} while(0)
  4306  
  4307  #define x86_64_comisd_reg_regp(inst, dreg, sregp) \
  4308  	do { \
  4309  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x2f, (dreg), (sregp), 0); \
  4310  	} while(0)
  4311  
  4312  #define x86_64_comisd_reg_mem(inst, dreg, mem) \
  4313  	do { \
  4314  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x2f, (dreg), (mem), 0); \
  4315  	} while(0)
  4316  
  4317  #define x86_64_comisd_reg_membase(inst, dreg, basereg, disp) \
  4318  	do { \
  4319  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x2f, (dreg), (basereg), (disp), 0); \
  4320  	} while(0)
  4321  
  4322  #define x86_64_comisd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4323  	do { \
  4324  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x2f, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4325  	} while(0)
  4326  
  4327  /*
  4328   * ucomiss: Compare unordered scalar single precision values
  4329   */
  4330  #define x86_64_ucomiss_reg_reg(inst, dreg, sreg) \
  4331  	do { \
  4332  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x2e, (dreg), (sreg)); \
  4333  	} while(0)
  4334  
  4335  #define x86_64_ucomiss_reg_regp(inst, dreg, sregp) \
  4336  	do { \
  4337  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x2e, (dreg), (sregp)); \
  4338  	} while(0)
  4339  
  4340  #define x86_64_ucomiss_reg_mem(inst, dreg, mem) \
  4341  	do { \
  4342  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x2e, (dreg), (mem)); \
  4343  	} while(0)
  4344  
  4345  #define x86_64_ucomiss_reg_membase(inst, dreg, basereg, disp) \
  4346  	do { \
  4347  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x2e, (dreg), (basereg), (disp)); \
  4348  	} while(0)
  4349  
  4350  #define x86_64_ucomiss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4351  	do { \
  4352  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x2e, (dreg), (basereg), (disp), (indexreg), (shift)); \
  4353  	} while(0)
  4354  
  4355  /*
  4356   * ucomisd: Compare unordered scalar double precision values
  4357   */
  4358  #define x86_64_ucomisd_reg_reg(inst, dreg, sreg) \
  4359  	do { \
  4360  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x2e, (dreg), (sreg), 0); \
  4361  	} while(0)
  4362  
  4363  #define x86_64_ucomisd_reg_regp(inst, dreg, sregp) \
  4364  	do { \
  4365  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x2e, (dreg), (sregp), 0); \
  4366  	} while(0)
  4367  
  4368  #define x86_64_ucomisd_reg_mem(inst, dreg, mem) \
  4369  	do { \
  4370  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x2e, (dreg), (mem), 0); \
  4371  	} while(0)
  4372  
  4373  #define x86_64_ucomisd_reg_membase(inst, dreg, basereg, disp) \
  4374  	do { \
  4375  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x2e, (dreg), (basereg), (disp), 0); \
  4376  	} while(0)
  4377  
  4378  #define x86_64_ucomisd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4379  	do { \
  4380  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x2e, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4381  	} while(0)
  4382  
  4383  /*
  4384   * Arithmetic opcodes
  4385   */
  4386  
  4387  /*
  4388   * addss: Add scalar single precision float values
  4389   */
  4390  #define x86_64_addss_reg_reg(inst, dreg, sreg) \
  4391  	do { \
  4392  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x58, (dreg), (sreg), 0); \
  4393  	} while(0)
  4394  
  4395  #define x86_64_addss_reg_regp(inst, dreg, sregp) \
  4396  	do { \
  4397  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x58, (dreg), (sregp), 0); \
  4398  	} while(0)
  4399  
  4400  #define x86_64_addss_reg_mem(inst, dreg, mem) \
  4401  	do { \
  4402  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x58, (dreg), (mem), 0); \
  4403  	} while(0)
  4404  
  4405  #define x86_64_addss_reg_membase(inst, dreg, basereg, disp) \
  4406  	do { \
  4407  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x58, (dreg), (basereg), (disp), 0); \
  4408  	} while(0)
  4409  
  4410  #define x86_64_addss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4411  	do { \
  4412  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x58, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4413  	} while(0)
  4414  
  4415  /*
  4416   * subss: Substract scalar single precision float values
  4417   */
  4418  #define x86_64_subss_reg_reg(inst, dreg, sreg) \
  4419  	do { \
  4420  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (sreg), 0); \
  4421  	} while(0)
  4422  
  4423  #define x86_64_subss_reg_regp(inst, dreg, sregp) \
  4424  	do { \
  4425  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (sregp), 0); \
  4426  	} while(0)
  4427  
  4428  #define x86_64_subss_reg_mem(inst, dreg, mem) \
  4429  	do { \
  4430  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (mem), 0); \
  4431  	} while(0)
  4432  
  4433  #define x86_64_subss_reg_membase(inst, dreg, basereg, disp) \
  4434  	do { \
  4435  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (basereg), (disp), 0); \
  4436  	} while(0)
  4437  
  4438  #define x86_64_subss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4439  	do { \
  4440  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4441  	} while(0)
  4442  
  4443  /*
  4444   * mulss: Multiply scalar single precision float values
  4445   */
  4446  #define x86_64_mulss_reg_reg(inst, dreg, sreg) \
  4447  	do { \
  4448  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x59, (dreg), (sreg), 0); \
  4449  	} while(0)
  4450  
  4451  #define x86_64_mulss_reg_regp(inst, dreg, sregp) \
  4452  	do { \
  4453  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x59, (dreg), (sregp), 0); \
  4454  	} while(0)
  4455  
  4456  #define x86_64_mulss_reg_mem(inst, dreg, mem) \
  4457  	do { \
  4458  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x59, (dreg), (mem), 0); \
  4459  	} while(0)
  4460  
  4461  #define x86_64_mulss_reg_membase(inst, dreg, basereg, disp) \
  4462  	do { \
  4463  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x59, (dreg), (basereg), (disp), 0); \
  4464  	} while(0)
  4465  
  4466  #define x86_64_mulss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4467  	do { \
  4468  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x59, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4469  	} while(0)
  4470  
  4471  /*
  4472   * divss: Divide scalar single precision float values
  4473   */
  4474  #define x86_64_divss_reg_reg(inst, dreg, sreg) \
  4475  	do { \
  4476  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (sreg), 0); \
  4477  	} while(0)
  4478  
  4479  #define x86_64_divss_reg_regp(inst, dreg, sregp) \
  4480  	do { \
  4481  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (sregp), 0); \
  4482  	} while(0)
  4483  
  4484  #define x86_64_divss_reg_mem(inst, dreg, mem) \
  4485  	do { \
  4486  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (mem), 0); \
  4487  	} while(0)
  4488  
  4489  #define x86_64_divss_reg_membase(inst, dreg, basereg, disp) \
  4490  	do { \
  4491  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (basereg), (disp), 0); \
  4492  	} while(0)
  4493  
  4494  #define x86_64_divss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4495  	do { \
  4496  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4497  	} while(0)
  4498  
  4499  /*
  4500   * Macros for the logical operations with packed single precision values.
  4501   */
  4502  #define x86_64_plops_reg_reg(inst, op, dreg, sreg) \
  4503  	do { \
  4504  		x86_64_xmm2_reg_reg((inst), 0x0f, (op), (dreg), (sreg)); \
  4505  	} while(0)
  4506  
  4507  #define x86_64_plops_reg_regp(inst, op, dreg, sregp) \
  4508  	do { \
  4509  		x86_64_xmm2_reg_regp((inst), 0x0f, (op), (dreg), (sregp)); \
  4510  	} while(0)
  4511  
  4512  #define x86_64_plops_reg_mem(inst, op, dreg, mem) \
  4513  	do { \
  4514  		x86_64_xmm2_reg_mem((inst), 0x0f, (op), (dreg), (mem)); \
  4515  	} while(0)
  4516  
  4517  #define x86_64_plops_reg_membase(inst, op, dreg, basereg, disp) \
  4518  	do { \
  4519  		x86_64_xmm2_reg_membase((inst), 0x0f, (op), (dreg), (basereg), (disp)); \
  4520  	} while(0)
  4521  
  4522  #define x86_64_plops_reg_memindex(inst, op, dreg, basereg, disp, indexreg, shift) \
  4523  	do { \
  4524  		x86_64_xmm2_reg_memindex((inst), 0x0f, (op), (dreg), (basereg), (disp), (indexreg), (shift)); \
  4525  	} while(0)
  4526  
  4527  /*
  4528   * andps: And
  4529   */
  4530  #define x86_64_andps_reg_reg(inst, dreg, sreg) \
  4531  	do { \
  4532  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x54, (dreg), (sreg)); \
  4533  	} while(0)
  4534  
  4535  #define x86_64_andps_reg_regp(inst, dreg, sregp) \
  4536  	do { \
  4537  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x54, (dreg), (sregp)); \
  4538  	} while(0)
  4539  
  4540  #define x86_64_andps_reg_mem(inst, dreg, mem) \
  4541  	do { \
  4542  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x54, (dreg), (mem)); \
  4543  	} while(0)
  4544  
  4545  #define x86_64_andps_reg_membase(inst, dreg, basereg, disp) \
  4546  	do { \
  4547  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x54, (dreg), (basereg), (disp)); \
  4548  	} while(0)
  4549  
  4550  #define x86_64_andps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4551  	do { \
  4552  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x54, (dreg), (basereg), (disp), (indexreg), (shift)); \
  4553  	} while(0)
  4554  
  4555  /*
  4556   * orps: Or
  4557   */
  4558  #define x86_64_orps_reg_reg(inst, dreg, sreg) \
  4559  	do { \
  4560  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x56, (dreg), (sreg)); \
  4561  	} while(0)
  4562  
  4563  #define x86_64_orps_reg_regp(inst, dreg, sregp) \
  4564  	do { \
  4565  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x56, (dreg), (sregp)); \
  4566  	} while(0)
  4567  
  4568  #define x86_64_orps_reg_mem(inst, dreg, mem) \
  4569  	do { \
  4570  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x56, (dreg), (mem)); \
  4571  	} while(0)
  4572  
  4573  #define x86_64_orps_reg_membase(inst, dreg, basereg, disp) \
  4574  	do { \
  4575  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x56, (dreg), (basereg), (disp)); \
  4576  	} while(0)
  4577  
  4578  #define x86_64_orps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4579  	do { \
  4580  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x56, (dreg), (basereg), (disp), (indexreg), (shift)); \
  4581  	} while(0)
  4582  
  4583  /*
  4584   * xorps: Xor
  4585   */
  4586  #define x86_64_xorps_reg_reg(inst, dreg, sreg) \
  4587  	do { \
  4588  		x86_64_xmm2_reg_reg((inst), 0x0f, 0x57, (dreg), (sreg)); \
  4589  	} while(0)
  4590  
  4591  #define x86_64_xorps_reg_regp(inst, dreg, sregp) \
  4592  	do { \
  4593  		x86_64_xmm2_reg_regp((inst), 0x0f, 0x57, (dreg), (sregp)); \
  4594  	} while(0)
  4595  
  4596  #define x86_64_xorps_reg_mem(inst, dreg, mem) \
  4597  	do { \
  4598  		x86_64_xmm2_reg_mem((inst), 0x0f, 0x57, (dreg), (mem)); \
  4599  	} while(0)
  4600  
  4601  #define x86_64_xorps_reg_membase(inst, dreg, basereg, disp) \
  4602  	do { \
  4603  		x86_64_xmm2_reg_membase((inst), 0x0f, 0x57, (dreg), (basereg), (disp)); \
  4604  	} while(0)
  4605  
  4606  #define x86_64_xorps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4607  	do { \
  4608  		x86_64_xmm2_reg_memindex((inst), 0x0f, 0x57, (dreg), (basereg), (disp), (indexreg), (shift)); \
  4609  	} while(0)
  4610  
  4611  /*
  4612   * maxss: Maximum value
  4613   */
  4614  #define x86_64_maxss_reg_reg(inst, dreg, sreg) \
  4615  	do { \
  4616  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5f, (dreg), (sreg), 0); \
  4617  	} while(0)
  4618  
  4619  #define x86_64_maxss_reg_regp(inst, dreg, sregp) \
  4620  	do { \
  4621  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5f, (dreg), (sregp), 0); \
  4622  	} while(0)
  4623  
  4624  #define x86_64_maxss_reg_mem(inst, dreg, mem) \
  4625  	do { \
  4626  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5f, (dreg), (mem), 0); \
  4627  	} while(0)
  4628  
  4629  #define x86_64_maxss_reg_membase(inst, dreg, basereg, disp) \
  4630  	do { \
  4631  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5f, (dreg), (basereg), (disp), 0); \
  4632  	} while(0)
  4633  
  4634  #define x86_64_maxss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4635  	do { \
  4636  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5f, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4637  	} while(0)
  4638  
  4639  /*
  4640   * minss: Minimum value
  4641   */
  4642  #define x86_64_minss_reg_reg(inst, dreg, sreg) \
  4643  	do { \
  4644  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5d, (dreg), (sreg), 0); \
  4645  	} while(0)
  4646  
  4647  #define x86_64_minss_reg_regp(inst, dreg, sregp) \
  4648  	do { \
  4649  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5d, (dreg), (sregp), 0); \
  4650  	} while(0)
  4651  
  4652  #define x86_64_minss_reg_mem(inst, dreg, mem) \
  4653  	do { \
  4654  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5d, (dreg), (mem), 0); \
  4655  	} while(0)
  4656  
  4657  #define x86_64_minss_reg_membase(inst, dreg, basereg, disp) \
  4658  	do { \
  4659  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5d, (dreg), (basereg), (disp), 0); \
  4660  	} while(0)
  4661  
  4662  #define x86_64_minss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4663  	do { \
  4664  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5d, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4665  	} while(0)
  4666  
  4667  /*
  4668   * sqrtss: Square root
  4669   */
  4670  #define x86_64_sqrtss_reg_reg(inst, dreg, sreg) \
  4671  	do { \
  4672  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x51, (dreg), (sreg), 0); \
  4673  	} while(0)
  4674  
  4675  #define x86_64_sqrtss_reg_regp(inst, dreg, sregp) \
  4676  	do { \
  4677  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x51, (dreg), (sregp), 0); \
  4678  	} while(0)
  4679  
  4680  #define x86_64_sqrtss_reg_mem(inst, dreg, mem) \
  4681  	do { \
  4682  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x51, (dreg), (mem), 0); \
  4683  	} while(0)
  4684  
  4685  #define x86_64_sqrtss_reg_membase(inst, dreg, basereg, disp) \
  4686  	do { \
  4687  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x51, (dreg), (basereg), (disp), 0); \
  4688  	} while(0)
  4689  
  4690  #define x86_64_sqrtss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4691  	do { \
  4692  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x51, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4693  	} while(0)
  4694  
  4695  
  4696  /*
  4697   * Macros for the logical operations with packed double precision values.
  4698   */
  4699  #define x86_64_plopd_reg_reg(inst, op, dreg, sreg) \
  4700  	do { \
  4701  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, (op), (dreg), (sreg), 0); \
  4702  	} while(0)
  4703  
  4704  #define x86_64_plopd_reg_regp(inst, op, dreg, sregp) \
  4705  	do { \
  4706  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, (op), (dreg), (sregp), 0); \
  4707  	} while(0)
  4708  
  4709  #define x86_64_plopd_reg_mem(inst, op, dreg, mem) \
  4710  	do { \
  4711  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, (op), (dreg), (mem), 0); \
  4712  	} while(0)
  4713  
  4714  #define x86_64_plopd_reg_membase(inst, op, dreg, basereg, disp) \
  4715  	do { \
  4716  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, (op), (dreg), (basereg), (disp), 0); \
  4717  	} while(0)
  4718  
  4719  #define x86_64_plopd_reg_memindex(inst, op, dreg, basereg, disp, indexreg, shift) \
  4720  	do { \
  4721  		x86_64_xmm2_reg_memindex_size((inst), 0x66, 0x0f, (op), (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4722  	} while(0)
  4723  
  4724  /*
  4725   * addsd: Add scalar double precision float values
  4726   */
  4727  #define x86_64_addsd_reg_reg(inst, dreg, sreg) \
  4728  	do { \
  4729  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x58, (dreg), (sreg), 0); \
  4730  	} while(0)
  4731  
  4732  #define x86_64_addsd_reg_regp(inst, dreg, sregp) \
  4733  	do { \
  4734  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x58, (dreg), (sregp), 0); \
  4735  	} while(0)
  4736  
  4737  #define x86_64_addsd_reg_mem(inst, dreg, mem) \
  4738  	do { \
  4739  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x58, (dreg), (mem), 0); \
  4740  	} while(0)
  4741  
  4742  #define x86_64_addsd_reg_membase(inst, dreg, basereg, disp) \
  4743  	do { \
  4744  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x58, (dreg), (basereg), (disp), 0); \
  4745  	} while(0)
  4746  
  4747  #define x86_64_addsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4748  	do { \
  4749  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x58, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4750  	} while(0)
  4751  
  4752  /*
  4753   * subsd: Substract scalar double precision float values
  4754   */
  4755  #define x86_64_subsd_reg_reg(inst, dreg, sreg) \
  4756  	do { \
  4757  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (sreg), 0); \
  4758  	} while(0)
  4759  
  4760  #define x86_64_subsd_reg_regp(inst, dreg, sregp) \
  4761  	do { \
  4762  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (sregp), 0); \
  4763  	} while(0)
  4764  
  4765  #define x86_64_subsd_reg_mem(inst, dreg, mem) \
  4766  	do { \
  4767  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (mem), 0); \
  4768  	} while(0)
  4769  
  4770  #define x86_64_subsd_reg_membase(inst, dreg, basereg, disp) \
  4771  	do { \
  4772  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (basereg), (disp), 0); \
  4773  	} while(0)
  4774  
  4775  #define x86_64_subsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4776  	do { \
  4777  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4778  	} while(0)
  4779  
  4780  /*
  4781   * mulsd: Multiply scalar double precision float values
  4782   */
  4783  #define x86_64_mulsd_reg_reg(inst, dreg, sreg) \
  4784  	do { \
  4785  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x59, (dreg), (sreg), 0); \
  4786  	} while(0)
  4787  
  4788  #define x86_64_mulsd_reg_regp(inst, dreg, sregp) \
  4789  	do { \
  4790  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x59, (dreg), (sregp), 0); \
  4791  	} while(0)
  4792  
  4793  #define x86_64_mulsd_reg_mem(inst, dreg, mem) \
  4794  	do { \
  4795  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x59, (dreg), (mem), 0); \
  4796  	} while(0)
  4797  
  4798  #define x86_64_mulsd_reg_membase(inst, dreg, basereg, disp) \
  4799  	do { \
  4800  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x59, (dreg), (basereg), (disp), 0); \
  4801  	} while(0)
  4802  
  4803  #define x86_64_mulsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4804  	do { \
  4805  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x59, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4806  	} while(0)
  4807  
  4808  /*
  4809   * divsd: Divide scalar double precision float values
  4810   */
  4811  #define x86_64_divsd_reg_reg(inst, dreg, sreg) \
  4812  	do { \
  4813  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (sreg), 0); \
  4814  	} while(0)
  4815  
  4816  #define x86_64_divsd_reg_regp(inst, dreg, sregp) \
  4817  	do { \
  4818  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (sregp), 0); \
  4819  	} while(0)
  4820  
  4821  #define x86_64_divsd_reg_mem(inst, dreg, mem) \
  4822  	do { \
  4823  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (mem), 0); \
  4824  	} while(0)
  4825  
  4826  #define x86_64_divsd_reg_membase(inst, dreg, basereg, disp) \
  4827  	do { \
  4828  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (basereg), (disp), 0); \
  4829  	} while(0)
  4830  
  4831  #define x86_64_divsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4832  	do { \
  4833  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4834  	} while(0)
  4835  
  4836  /*
  4837   * andpd: And
  4838   */
  4839  #define x86_64_andpd_reg_reg(inst, dreg, sreg) \
  4840  	do { \
  4841  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x54, (dreg), (sreg), 0); \
  4842  	} while(0)
  4843  
  4844  #define x86_64_andpd_reg_regp(inst, dreg, sregp) \
  4845  	do { \
  4846  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x54, (dreg), (sregp), 0); \
  4847  	} while(0)
  4848  
  4849  #define x86_64_andpd_reg_mem(inst, dreg, mem) \
  4850  	do { \
  4851  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x54, (dreg), (mem), 0); \
  4852  	} while(0)
  4853  
  4854  #define x86_64_andpd_reg_membase(inst, dreg, basereg, disp) \
  4855  	do { \
  4856  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x54, (dreg), (basereg), (disp), 0); \
  4857  	} while(0)
  4858  
  4859  #define x86_64_andpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4860  	do { \
  4861  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x54, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4862  	} while(0)
  4863  
  4864  /*
  4865   * orpd: Or
  4866   */
  4867  #define x86_64_orpd_reg_reg(inst, dreg, sreg) \
  4868  	do { \
  4869  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x56, (dreg), (sreg), 0); \
  4870  	} while(0)
  4871  
  4872  #define x86_64_orpd_reg_regp(inst, dreg, sregp) \
  4873  	do { \
  4874  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x56, (dreg), (sregp), 0); \
  4875  	} while(0)
  4876  
  4877  #define x86_64_orpd_reg_mem(inst, dreg, mem) \
  4878  	do { \
  4879  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x56, (dreg), (mem), 0); \
  4880  	} while(0)
  4881  
  4882  #define x86_64_orpd_reg_membase(inst, dreg, basereg, disp) \
  4883  	do { \
  4884  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x56, (dreg), (basereg), (disp), 0); \
  4885  	} while(0)
  4886  
  4887  #define x86_64_orpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4888  	do { \
  4889  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x56, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4890  	} while(0)
  4891  
  4892  /*
  4893   * xorpd: Xor
  4894   */
  4895  #define x86_64_xorpd_reg_reg(inst, dreg, sreg) \
  4896  	do { \
  4897  		x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x57, (dreg), (sreg), 0); \
  4898  	} while(0)
  4899  
  4900  #define x86_64_xorpd_reg_regp(inst, dreg, sregp) \
  4901  	do { \
  4902  		x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x57, (dreg), (sregp), 0); \
  4903  	} while(0)
  4904  
  4905  #define x86_64_xorpd_reg_mem(inst, dreg, mem) \
  4906  	do { \
  4907  		x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x57, (dreg), (mem), 0); \
  4908  	} while(0)
  4909  
  4910  #define x86_64_xorpd_reg_membase(inst, dreg, basereg, disp) \
  4911  	do { \
  4912  		x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x57, (dreg), (basereg), (disp), 0); \
  4913  	} while(0)
  4914  
  4915  #define x86_64_xorpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4916  	do { \
  4917  		x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x57, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4918  	} while(0)
  4919  
  4920  /*
  4921   * maxsd: Maximum value
  4922   */
  4923  #define x86_64_maxsd_reg_reg(inst, dreg, sreg) \
  4924  	do { \
  4925  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5f, (dreg), (sreg), 0); \
  4926  	} while(0)
  4927  
  4928  #define x86_64_maxsd_reg_regp(inst, dreg, sregp) \
  4929  	do { \
  4930  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5f, (dreg), (sregp), 0); \
  4931  	} while(0)
  4932  
  4933  #define x86_64_maxsd_reg_mem(inst, dreg, mem) \
  4934  	do { \
  4935  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5f, (dreg), (mem), 0); \
  4936  	} while(0)
  4937  
  4938  #define x86_64_maxsd_reg_membase(inst, dreg, basereg, disp) \
  4939  	do { \
  4940  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5f, (dreg), (basereg), (disp), 0); \
  4941  	} while(0)
  4942  
  4943  #define x86_64_maxsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4944  	do { \
  4945  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5f, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4946  	} while(0)
  4947  
  4948  /*
  4949   * minsd: Minimum value
  4950   */
  4951  #define x86_64_minsd_reg_reg(inst, dreg, sreg) \
  4952  	do { \
  4953  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5d, (dreg), (sreg), 0); \
  4954  	} while(0)
  4955  
  4956  #define x86_64_minsd_reg_regp(inst, dreg, sregp) \
  4957  	do { \
  4958  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5d, (dreg), (sregp), 0); \
  4959  	} while(0)
  4960  
  4961  #define x86_64_minsd_reg_mem(inst, dreg, mem) \
  4962  	do { \
  4963  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5d, (dreg), (mem), 0); \
  4964  	} while(0)
  4965  
  4966  #define x86_64_minsd_reg_membase(inst, dreg, basereg, disp) \
  4967  	do { \
  4968  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5d, (dreg), (basereg), (disp), 0); \
  4969  	} while(0)
  4970  
  4971  #define x86_64_minsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  4972  	do { \
  4973  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2 0x0f, 0x5d, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  4974  	} while(0)
  4975  
  4976  /*
  4977   * sqrtsd: Square root
  4978   */
  4979  #define x86_64_sqrtsd_reg_reg(inst, dreg, sreg) \
  4980  	do { \
  4981  		x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x51, (dreg), (sreg), 0); \
  4982  	} while(0)
  4983  
  4984  #define x86_64_sqrtsd_reg_regp(inst, dreg, sregp) \
  4985  	do { \
  4986  		x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x51, (dreg), (sregp), 0); \
  4987  	} while(0)
  4988  
  4989  #define x86_64_sqrtsd_reg_mem(inst, dreg, mem) \
  4990  	do { \
  4991  		x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x51, (dreg), (mem), 0); \
  4992  	} while(0)
  4993  
  4994  #define x86_64_sqrtsd_reg_membase(inst, dreg, basereg, disp) \
  4995  	do { \
  4996  		x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x51, (dreg), (basereg), (disp), 0); \
  4997  	} while(0)
  4998  
  4999  #define x86_64_sqrtsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
  5000  	do { \
  5001  		x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x51, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  5002  	} while(0)
  5003  
  5004  /*
  5005   * Rounding: Available in SSE 4.1 only
  5006   */
  5007  
  5008  /*
  5009   * roundss: Round scalar single precision value
  5010   */
  5011  #define x86_64_roundss_reg_reg(inst, dreg, sreg, mode) \
  5012  	do { \
  5013  		x86_64_p1_xmm3_reg_reg_size((inst), 0x66, 0x0f, 0x3a, 0x0a, (dreg), (sreg), 0); \
  5014  		x86_imm_emit8((inst), (mode)); \
  5015  	} while(0)
  5016  
  5017  #define x86_64_roundss_reg_regp(inst, dreg, sregp, mode) \
  5018  	do { \
  5019  		x86_64_p1_xmm3_reg_regp_size((inst), 0x66, 0x0f, 0x3a, 0x0a, (dreg), (sregp), 0); \
  5020  		x86_imm_emit8((inst), (mode)); \
  5021  	} while(0)
  5022  
  5023  #define x86_64_roundss_reg_mem(inst, dreg, mem, mode) \
  5024  	do { \
  5025  		x86_64_p1_xmm3_reg_mem_size((inst), 0x66, 0x0f, 0x3a, 0x0a, (dreg), (mem), 0); \
  5026  		x86_imm_emit8((inst), (mode)); \
  5027  	} while(0)
  5028  
  5029  #define x86_64_roundss_reg_membase(inst, dreg, basereg, disp, mode) \
  5030  	do { \
  5031  		x86_64_p1_xmm3_reg_membase_size((inst), 0x66, 0x0f, 0x3a, 0x0a, (dreg), (basereg), (disp), 0); \
  5032  		x86_imm_emit8((inst), (mode)); \
  5033  	} while(0)
  5034  
  5035  #define x86_64_roundss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift, mode) \
  5036  	do { \
  5037  		x86_64_p1_xmm3_reg_memindex_size((inst), 0x66, 0x0f, 0x3a, 0x0a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  5038  		x86_imm_emit8((inst), (mode)); \
  5039  	} while(0)
  5040  
  5041  /*
  5042   * roundsd: Round scalar double precision value
  5043   */
  5044  #define x86_64_roundsd_reg_reg(inst, dreg, sreg, mode) \
  5045  	do { \
  5046  		x86_64_p1_xmm3_reg_reg_size((inst), 0x66, 0x0f, 0x3a, 0x0b, (dreg), (sreg), 0); \
  5047  		x86_imm_emit8((inst), (mode)); \
  5048  	} while(0)
  5049  
  5050  #define x86_64_roundsd_reg_regp(inst, dreg, sregp, mode) \
  5051  	do { \
  5052  		x86_64_p1_xmm3_reg_regp_size((inst), 0x66, 0x0f, 0x3a, 0x0b, (dreg), (sregp), 0); \
  5053  		x86_imm_emit8((inst), (mode)); \
  5054  	} while(0)
  5055  
  5056  #define x86_64_roundsd_reg_mem(inst, dreg, mem, mode) \
  5057  	do { \
  5058  		x86_64_p1_xmm3_reg_mem_size((inst), 0x66, 0x0f, 0x3a, 0x0b, (dreg), (mem), 0); \
  5059  		x86_imm_emit8((inst), (mode)); \
  5060  	} while(0)
  5061  
  5062  #define x86_64_roundsd_reg_membase(inst, dreg, basereg, disp, mode) \
  5063  	do { \
  5064  		x86_64_p1_xmm3_reg_membase_size((inst), 0x66, 0x0f, 0x3a, 0x0b, (dreg), (basereg), (disp), 0); \
  5065  		x86_imm_emit8((inst), (mode)); \
  5066  	} while(0)
  5067  
  5068  #define x86_64_roundsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift, mode) \
  5069  	do { \
  5070  		x86_64_p1_xmm3_reg_memindex_size((inst), 0x66, 0x0f, 0x3a, 0x0b, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
  5071  		x86_imm_emit8((inst), (mode)); \
  5072  	} while(0)
  5073  
  5074  /*
  5075   * Clear xmm register
  5076   */
  5077  #define x86_64_clear_xreg(inst, reg) \
  5078  	do { \
  5079  		x86_64_xorps_reg_reg((inst), (reg), (reg)); \
  5080  	} while(0)
  5081  
  5082  /*
  5083   * fpu instructions
  5084   */
  5085  
  5086  /*
  5087   * fld
  5088   */
  5089  
  5090  #define x86_64_fld_regp_size(inst, sregp, size) \
  5091  	do { \
  5092  		x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
  5093  		switch(size) \
  5094  		{ \
  5095  			case 4: \
  5096  			{ \
  5097  			    *(inst)++ = (unsigned char)0xd9; \
  5098  				x86_64_regp_emit((inst), 0, (sregp)); \
  5099  			} \
  5100  			break; \
  5101  			case 8: \
  5102  			{ \
  5103  			    *(inst)++ = (unsigned char)0xdd; \
  5104  				x86_64_regp_emit((inst), 0, (sregp)); \
  5105  			} \
  5106  			break; \
  5107  			case 10: \
  5108  			{ \
  5109  			    *(inst)++ = (unsigned char)0xdb; \
  5110  				x86_64_regp_emit((inst), 5, (sregp)); \
  5111  			} \
  5112  			break; \
  5113  		} \
  5114  	} while(0)
  5115  
  5116  #define x86_64_fld_mem_size(inst, mem, size) \
  5117  	do { \
  5118  		switch(size) \
  5119  		{ \
  5120  			case 4: \
  5121  			{ \
  5122  			    *(inst)++ = (unsigned char)0xd9; \
  5123  				x86_64_mem_emit((inst), 0, (mem)); \
  5124  			} \
  5125  			break; \
  5126  			case 8: \
  5127  			{ \
  5128  			    *(inst)++ = (unsigned char)0xdd; \
  5129  				x86_64_mem_emit((inst), 0, (mem)); \
  5130  			} \
  5131  			break; \
  5132  			case 10: \
  5133  			{ \
  5134  			    *(inst)++ = (unsigned char)0xdb; \
  5135  				x86_64_mem_emit((inst), 5, (mem)); \
  5136  			} \
  5137  			break; \
  5138  		} \
  5139  	} while(0)
  5140  
  5141  #define x86_64_fld_membase_size(inst, basereg, disp, size) \
  5142  	do { \
  5143  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5144  		switch(size) \
  5145  		{ \
  5146  			case 4: \
  5147  			{ \
  5148  			    *(inst)++ = (unsigned char)0xd9; \
  5149  				x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  5150  			} \
  5151  			break; \
  5152  			case 8: \
  5153  			{ \
  5154  			    *(inst)++ = (unsigned char)0xdd; \
  5155  				x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  5156  			} \
  5157  			break; \
  5158  			case 10: \
  5159  			{ \
  5160  			    *(inst)++ = (unsigned char)0xdb; \
  5161  				x86_64_membase_emit((inst), 5, (basereg), (disp)); \
  5162  			} \
  5163  			break; \
  5164  		} \
  5165  	} while(0)
  5166  
  5167  #define x86_64_fld_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  5168  	do { \
  5169  		x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \
  5170  		switch(size) \
  5171  		{ \
  5172  			case 4: \
  5173  			{ \
  5174  			    *(inst)++ = (unsigned char)0xd9; \
  5175  				x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \
  5176  			} \
  5177  			break; \
  5178  			case 8: \
  5179  			{ \
  5180  			    *(inst)++ = (unsigned char)0xdd; \
  5181  				x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \
  5182  			} \
  5183  			break; \
  5184  			case 10: \
  5185  			{ \
  5186  			    *(inst)++ = (unsigned char)0xdb; \
  5187  				x86_64_memindex_emit((inst), 5, (basereg), (disp), (indexreg), (shift)); \
  5188  			} \
  5189  			break; \
  5190  		} \
  5191  	} while(0)
  5192  
  5193  /*
  5194   * fild: Load an integer and convert it to long double
  5195   */
  5196  #define x86_64_fild_mem_size(inst, mem, size) \
  5197  	do { \
  5198  		switch(size) \
  5199  		{ \
  5200  			case 2: \
  5201  			{ \
  5202  			    *(inst)++ = (unsigned char)0xdf; \
  5203  				x86_64_mem_emit((inst), 0, (mem));	\
  5204  			} \
  5205  			break; \
  5206  			case 4: \
  5207  			{ \
  5208  			    *(inst)++ = (unsigned char)0xdb; \
  5209  				x86_64_mem_emit((inst), 0, (mem));	\
  5210  			} \
  5211  			break; \
  5212  			case 8: \
  5213  			{ \
  5214  			    *(inst)++ = (unsigned char)0xdf; \
  5215  				x86_64_mem_emit((inst), 5, (mem));	\
  5216  			} \
  5217  			break; \
  5218  		} \
  5219  	} while (0)
  5220  
  5221  #define x86_64_fild_membase_size(inst, basereg, disp, size) \
  5222  	do { \
  5223  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5224  		switch(size) \
  5225  		{ \
  5226  			case 2: \
  5227  			{ \
  5228  			    *(inst)++ = (unsigned char)0xdf; \
  5229  				x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  5230  			} \
  5231  			break; \
  5232  			case 4: \
  5233  			{ \
  5234  			    *(inst)++ = (unsigned char)0xdb; \
  5235  				x86_64_membase_emit((inst), 0, (basereg), (disp)); \
  5236  			} \
  5237  			break; \
  5238  			case 8: \
  5239  			{ \
  5240  			    *(inst)++ = (unsigned char)0xdf; \
  5241  				x86_64_membase_emit((inst), 5, (basereg), (disp)); \
  5242  			} \
  5243  			break; \
  5244  		} \
  5245  	} while (0)
  5246  
  5247  /*
  5248   * fst: Store fpu register to memory (only float32 and float64 allowed)
  5249   */
  5250  
  5251  #define x86_64_fst_regp_size(inst, sregp, size) \
  5252  	do { \
  5253  		x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
  5254  		switch(size) \
  5255  		{ \
  5256  			case 4: \
  5257  			{ \
  5258  			    *(inst)++ = (unsigned char)0xd9; \
  5259  				x86_64_regp_emit((inst), 2, (sregp)); \
  5260  			} \
  5261  			break; \
  5262  			case 8: \
  5263  			{ \
  5264  			    *(inst)++ = (unsigned char)0xdd; \
  5265  				x86_64_regp_emit((inst), 2, (sregp)); \
  5266  			} \
  5267  			break; \
  5268  		} \
  5269  	} while(0)
  5270  
  5271  #define x86_64_fst_mem_size(inst, mem, size) \
  5272  	do { \
  5273  		switch(size) \
  5274  		{ \
  5275  			case 4: \
  5276  			{ \
  5277  			    *(inst)++ = (unsigned char)0xd9; \
  5278  				x86_64_mem_emit((inst), 2, (mem)); \
  5279  			} \
  5280  			break; \
  5281  			case 8: \
  5282  			{ \
  5283  			    *(inst)++ = (unsigned char)0xdd; \
  5284  				x86_64_mem_emit((inst), 2, (mem)); \
  5285  			} \
  5286  			break; \
  5287  		} \
  5288  	} while(0)
  5289  
  5290  #define x86_64_fst_membase_size(inst, basereg, disp, size) \
  5291  	do { \
  5292  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5293  		switch(size) \
  5294  		{ \
  5295  			case 4: \
  5296  			{ \
  5297  			    *(inst)++ = (unsigned char)0xd9; \
  5298  				x86_64_membase_emit((inst), 2, (basereg), (disp)); \
  5299  			} \
  5300  			break; \
  5301  			case 8: \
  5302  			{ \
  5303  			    *(inst)++ = (unsigned char)0xdd; \
  5304  				x86_64_membase_emit((inst), 2, (basereg), (disp)); \
  5305  			} \
  5306  			break; \
  5307  		} \
  5308  	} while(0)
  5309  
  5310  #define x86_64_fst_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  5311  	do { \
  5312  		x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \
  5313  		switch(size) \
  5314  		{ \
  5315  			case 4: \
  5316  			{ \
  5317  			    *(inst)++ = (unsigned char)0xd9; \
  5318  				x86_64_memindex_emit((inst), 2, (basereg), (disp), (indexreg), (shift)); \
  5319  			} \
  5320  			break; \
  5321  			case 8: \
  5322  			{ \
  5323  			    *(inst)++ = (unsigned char)0xdd; \
  5324  				x86_64_memindex_emit((inst), 2, (basereg), (disp), (indexreg), (shift)); \
  5325  			} \
  5326  			break; \
  5327  		} \
  5328  	} while(0)
  5329  
  5330  /*
  5331   * fstp: store top fpu register to memory and pop it from the fpu stack
  5332   */
  5333  #define x86_64_fstp_regp_size(inst, sregp, size) \
  5334  	do { \
  5335  		x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
  5336  		switch(size) \
  5337  		{ \
  5338  			case 4: \
  5339  			{ \
  5340  			    *(inst)++ = (unsigned char)0xd9; \
  5341  				x86_64_regp_emit((inst), 3, (sregp)); \
  5342  			} \
  5343  			break; \
  5344  			case 8: \
  5345  			{ \
  5346  			    *(inst)++ = (unsigned char)0xdd; \
  5347  				x86_64_regp_emit((inst), 3, (sregp)); \
  5348  			} \
  5349  			break; \
  5350  			case 10: \
  5351  			{ \
  5352  			    *(inst)++ = (unsigned char)0xdb; \
  5353  				x86_64_regp_emit((inst), 7, (sregp)); \
  5354  			} \
  5355  			break; \
  5356  		} \
  5357  	} while(0)
  5358  
  5359  #define x86_64_fstp_mem_size(inst, mem, size) \
  5360  	do { \
  5361  		switch(size) \
  5362  		{ \
  5363  			case 4: \
  5364  			{ \
  5365  			    *(inst)++ = (unsigned char)0xd9; \
  5366  				x86_64_mem_emit((inst), 3, (mem)); \
  5367  			} \
  5368  			break; \
  5369  			case 8: \
  5370  			{ \
  5371  			    *(inst)++ = (unsigned char)0xdd; \
  5372  				x86_64_mem_emit((inst), 3, (mem)); \
  5373  			} \
  5374  			break; \
  5375  			case 10: \
  5376  			{ \
  5377  			    *(inst)++ = (unsigned char)0xdb; \
  5378  				x86_64_mem_emit((inst), 7, (mem)); \
  5379  			} \
  5380  			break; \
  5381  		} \
  5382  	} while(0)
  5383  
  5384  #define x86_64_fstp_membase_size(inst, basereg, disp, size) \
  5385  	do { \
  5386  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5387  		switch(size) \
  5388  		{ \
  5389  			case 4: \
  5390  			{ \
  5391  			    *(inst)++ = (unsigned char)0xd9; \
  5392  				x86_64_membase_emit((inst), 3, (basereg), (disp)); \
  5393  			} \
  5394  			break; \
  5395  			case 8: \
  5396  			{ \
  5397  			    *(inst)++ = (unsigned char)0xdd; \
  5398  				x86_64_membase_emit((inst), 3, (basereg), (disp)); \
  5399  			} \
  5400  			break; \
  5401  			case 10: \
  5402  			{ \
  5403  			    *(inst)++ = (unsigned char)0xdb; \
  5404  				x86_64_membase_emit((inst), 7, (basereg), (disp)); \
  5405  			} \
  5406  			break; \
  5407  		} \
  5408  	} while(0)
  5409  
  5410  #define x86_64_fstp_memindex_size(inst, basereg, disp, indexreg, shift, size) \
  5411  	do { \
  5412  		x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \
  5413  		switch(size) \
  5414  		{ \
  5415  			case 4: \
  5416  			{ \
  5417  			    *(inst)++ = (unsigned char)0xd9; \
  5418  				x86_64_memindex_emit((inst), 3, (basereg), (disp), (indexreg), (shift)); \
  5419  			} \
  5420  			break; \
  5421  			case 8: \
  5422  			{ \
  5423  			    *(inst)++ = (unsigned char)0xdd; \
  5424  				x86_64_memindex_emit((inst), 3, (basereg), (disp), (indexreg), (shift)); \
  5425  			} \
  5426  			break; \
  5427  			case 10: \
  5428  			{ \
  5429  			    *(inst)++ = (unsigned char)0xdb; \
  5430  				x86_64_memindex_emit((inst), 7, (basereg), (disp), (indexreg), (shift)); \
  5431  			} \
  5432  			break; \
  5433  		} \
  5434  	} while(0)
  5435  
  5436  /*
  5437   * fistp: Convert long double to integer
  5438   */
  5439  #define x86_64_fistp_mem_size(inst, mem, size) \
  5440  	do { \
  5441  		switch((size)) \
  5442  		{ \
  5443  			case 2: \
  5444  			{ \
  5445  				*(inst)++ = (unsigned char)0xdf; \
  5446  				x86_64_mem_emit((inst), 3, (mem)); \
  5447  			} \
  5448  			break; \
  5449  			case 4: \
  5450  			{ \
  5451  				*(inst)++ = (unsigned char)0xdb; \
  5452  				x86_64_mem_emit((inst), 3, (mem)); \
  5453  			} \
  5454  			break; \
  5455  			case 8: \
  5456  			{ \
  5457  				*(inst)++ = (unsigned char)0xdf; \
  5458  				x86_64_mem_emit((inst), 7, (mem)); \
  5459  			} \
  5460  			break; \
  5461  		} \
  5462  	} while(0)
  5463  
  5464  #define x86_64_fistp_regp_size(inst, dregp, size) \
  5465  	do { \
  5466  		x86_64_rex_emit((inst), 0, 0, 0, (dregp)); \
  5467  		switch((size)) \
  5468  		{ \
  5469  			case 2: \
  5470  			{ \
  5471  				*(inst)++ = (unsigned char)0xdf; \
  5472  				x86_64_regp_emit((inst), 3, (dregp)); \
  5473  			} \
  5474  			break; \
  5475  			case 4: \
  5476  			{ \
  5477  				*(inst)++ = (unsigned char)0xdb; \
  5478  				x86_64_regp_emit((inst), 3, (dregp)); \
  5479  			} \
  5480  			break; \
  5481  			case 8: \
  5482  			{ \
  5483  				*(inst)++ = (unsigned char)0xdf; \
  5484  				x86_64_regp_emit((inst), 7, (dregp)); \
  5485  			} \
  5486  			break; \
  5487  		} \
  5488  	} while(0)
  5489  
  5490  #define x86_64_fistp_membase_size(inst, basereg, disp, size) \
  5491  	do { \
  5492  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5493  		switch((size)) \
  5494  		{ \
  5495  			case 2: \
  5496  			{ \
  5497  				*(inst)++ = (unsigned char)0xdf; \
  5498  				x86_64_membase_emit((inst), 3, (basereg), (disp)); \
  5499  			} \
  5500  			break; \
  5501  			case 4: \
  5502  			{ \
  5503  				*(inst)++ = (unsigned char)0xdb; \
  5504  				x86_64_membase_emit((inst), 3, (basereg), (disp)); \
  5505  			} \
  5506  			break; \
  5507  			case 8: \
  5508  			{ \
  5509  				*(inst)++ = (unsigned char)0xdf; \
  5510  				x86_64_membase_emit((inst), 7, (basereg), (disp)); \
  5511  			} \
  5512  			break; \
  5513  		} \
  5514  	} while(0)
  5515  
  5516  /*
  5517   * frndint: Round st(0) to integer according to the rounding mode set in the fpu control word.
  5518   */
  5519  #define x86_64_frndint(inst) \
  5520  	do { \
  5521  		*(inst)++ = (unsigned char)0xd9; \
  5522  		*(inst)++ = (unsigned char)0xfc; \
  5523  	} while(0)
  5524  
  5525  /*
  5526   * fisttp: Convert long double to integer using truncation as rounding mode Available in SSE 3 only
  5527   */
  5528  #define x86_64_fisttp_regp_size(inst, dregp, size) \
  5529  	do { \
  5530  		x86_64_rex_emit((inst), 0, 0, 0, (dregp)); \
  5531  		switch((size)) \
  5532  		{ \
  5533  			case 2: \
  5534  			{ \
  5535  				*(inst)++ = (unsigned char)0xdf; \
  5536  				x86_64_regp_emit((inst), 1, (dregp)); \
  5537  			} \
  5538  			break; \
  5539  			case 4: \
  5540  			{ \
  5541  				*(inst)++ = (unsigned char)0xdb; \
  5542  				x86_64_regp_emit((inst), 1, (dregp)); \
  5543  			} \
  5544  			break; \
  5545  			case 8: \
  5546  			{ \
  5547  				*(inst)++ = (unsigned char)0xdd; \
  5548  				x86_64_regp_emit((inst), 1, (dregp)); \
  5549  			} \
  5550  			break; \
  5551  		} \
  5552  	} while(0)
  5553  
  5554  #define x86_64_fisttp_mem_size(inst, mem, size) \
  5555  	do { \
  5556  		switch((size)) \
  5557  		{ \
  5558  			case 2: \
  5559  			{ \
  5560  				*(inst)++ = (unsigned char)0xdf; \
  5561  				x86_64_mem_emit((inst), 1, (mem)); \
  5562  			} \
  5563  			break; \
  5564  			case 4: \
  5565  			{ \
  5566  				*(inst)++ = (unsigned char)0xdb; \
  5567  				x86_64_mem_emit((inst), 1, (mem)); \
  5568  			} \
  5569  			break; \
  5570  			case 8: \
  5571  			{ \
  5572  				*(inst)++ = (unsigned char)0xdd; \
  5573  				x86_64_mem_emit((inst), 1, (mem)); \
  5574  			} \
  5575  			break; \
  5576  		} \
  5577  	} while(0)
  5578  
  5579  #define x86_64_fisttp_membase_size(inst, basereg, disp, size) \
  5580  	do { \
  5581  		x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
  5582  		switch((size)) \
  5583  		{ \
  5584  			case 2: \
  5585  			{ \
  5586  				*(inst)++ = (unsigned char)0xdf; \
  5587  				x86_64_membase_emit((inst), 1, (basereg), (disp)); \
  5588  			} \
  5589  			break; \
  5590  			case 4: \
  5591  			{ \
  5592  				*(inst)++ = (unsigned char)0xdb; \
  5593  				x86_64_membase_emit((inst), 1, (basereg), (disp)); \
  5594  			} \
  5595  			break; \
  5596  			case 8: \
  5597  			{ \
  5598  				*(inst)++ = (unsigned char)0xdd; \
  5599  				x86_64_membase_emit((inst), 1, (basereg), (disp)); \
  5600  			} \
  5601  			break; \
  5602  		} \
  5603  	} while(0)
  5604  
  5605  #define x86_64_fabs(inst) \
  5606  	do { \
  5607  		*(inst)++ = (unsigned char)0xd9; \
  5608  		*(inst)++ = (unsigned char)0xe1; \
  5609  	} while(0)
  5610  
  5611  #define x86_64_fchs(inst)	\
  5612  	do {	\
  5613  		*(inst)++ = (unsigned char)0xd9;	\
  5614  		*(inst)++ = (unsigned char)0xe0;	\
  5615  	} while(0)
  5616  
  5617  /*
  5618   * Store fpu control word after checking for pending unmasked fpu exceptions
  5619   */
  5620  #define x86_64_fnstcw(inst, mem) \
  5621  	do { \
  5622  		*(inst)++ = (unsigned char)0xd9; \
  5623  		x86_64_mem_emit((inst), 7, (mem)); \
  5624  	} while(0)
  5625  
  5626  #define x86_64_fnstcw_membase(inst, basereg, disp) \
  5627  	do { \
  5628  		*(inst)++ = (unsigned char)0xd9; \
  5629  		x86_64_membase_emit((inst), 7, (basereg), (disp)); \
  5630  	} while(0)
  5631  
  5632  /*
  5633   * Load fpu control word
  5634   */
  5635  #define x86_64_fldcw(inst, mem) \
  5636  	do { \
  5637  		*(inst)++ = (unsigned char)0xd9; \
  5638  		x86_64_mem_emit((inst), 5, (mem)); \
  5639  	} while(0)
  5640  
  5641  #define x86_64_fldcw_membase(inst, basereg, disp) \
  5642  	do { \
  5643  		*(inst)++ = (unsigned char)0xd9; \
  5644  		x86_64_membase_emit ((inst), 5, (basereg), (disp)); \
  5645  	} while(0)
  5646  
  5647  #ifdef	__cplusplus
  5648  };
  5649  #endif
  5650  
  5651  #endif /* _JIT_GEN_X86_64_H */