github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-rules-x86-64.ins (about)

     1  /*
     2   * jit-rules-x86-64.ins - Instruction selector for x86_64.
     3   *
     4   * Copyright (C) 2008  Southern Storm Software, Pty Ltd.
     5   *
     6   * This file is part of the libjit library.
     7   *
     8   * The libjit library is free software: you can redistribute it and/or
     9   * modify it under the terms of the GNU Lesser General Public License
    10   * as published by the Free Software Foundation, either version 2.1 of
    11   * the License, or (at your option) any later version.
    12   *
    13   * The libjit library is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16   * Lesser General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU Lesser General Public
    19   * License along with the libjit library.  If not, see
    20   * <http://www.gnu.org/licenses/>.
    21   */
    22  
    23  %regclass reg x86_64_reg
    24  %regclass creg x86_64_creg
    25  %regclass dreg x86_64_dreg
    26  %regclass rreg x86_64_rreg
    27  %regclass sreg x86_64_sreg
    28  %regclass freg x86_64_freg
    29  %regclass xreg x86_64_xreg
    30  
    31  /*
    32   * Conversion opcodes.
    33   */
    34  
    35  JIT_OP_TRUNC_SBYTE:
    36  	[=reg, reg] -> {
    37  		x86_64_movsx8_reg_reg_size(inst, $1, $2, 4);
    38  	}
    39  
    40  JIT_OP_TRUNC_UBYTE:
    41  	[=reg, reg] -> {
    42  		x86_64_movzx8_reg_reg_size(inst, $1, $2, 4);
    43  	}
    44  
    45  JIT_OP_TRUNC_SHORT:
    46  	[=reg, reg] -> {
    47  		x86_64_movsx16_reg_reg_size(inst, $1, $2, 4);
    48  	}
    49  
    50  JIT_OP_TRUNC_USHORT:
    51  	[=reg, reg] -> {
    52  		x86_64_movzx16_reg_reg_size(inst, $1, $2, 4);
    53  	}
    54  
    55  JIT_OP_TRUNC_INT:
    56  	[=reg, reg] -> {
    57  		if($1 != $2)
    58  		{
    59  			x86_64_mov_reg_reg_size(inst, $1, $2, 4);
    60  		}
    61  	}
    62  
    63  JIT_OP_TRUNC_UINT:
    64  	[=reg, reg] -> {
    65  		if($1 != $2)
    66  		{
    67  			x86_64_mov_reg_reg_size(inst, $1, $2, 4);
    68  		}
    69  	}
    70  
    71  JIT_OP_LOW_WORD:
    72  	[=reg, imm] -> {
    73  		x86_64_mov_reg_imm_size(inst, $1, $2, 4);
    74  	}
    75  	[=reg, local] -> {
    76  		x86_64_mov_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
    77  	}
    78  	[=reg, reg] -> {
    79  		if($1 != $2)
    80  		{
    81  			x86_64_mov_reg_reg_size(inst, $1, $2, 4);
    82  		}
    83  	}
    84  
    85  JIT_OP_EXPAND_INT:
    86  	[=reg, reg] -> {
    87  		x86_64_movsx32_reg_reg_size(inst, $1, $2, 8);
    88  	}
    89  
    90  JIT_OP_EXPAND_UINT:
    91  	[=reg, reg] -> {
    92  		x86_64_mov_reg_reg_size(inst, $1, $2, 4);
    93  	}
    94  
    95  JIT_OP_INT_TO_NFLOAT:
    96  	[=freg, local] -> {
    97  		x86_64_fild_membase_size(inst, X86_64_RBP, $2, 4);
    98  	}
    99  	[=freg, reg] -> {
   100  #ifdef HAVE_RED_ZONE
   101  		x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 4);
   102  		x86_64_fild_membase_size(inst, X86_64_RSP, -8, 4);
   103  #else
   104  		x86_64_push_reg_size(inst, $2, 8);
   105  		x86_64_fild_membase_size(inst, X86_64_RSP, 0, 4);
   106  		x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
   107  #endif
   108  	}
   109  
   110  JIT_OP_LONG_TO_NFLOAT:
   111  	[=freg, local] -> {
   112  		x86_64_fild_membase_size(inst, X86_64_RBP, $2, 8);
   113  	}
   114  	[=freg, reg] -> {
   115  #ifdef HAVE_RED_ZONE
   116  		x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 8);
   117  		x86_64_fild_membase_size(inst, X86_64_RSP, -8, 8);
   118  #else
   119  		x86_64_push_reg_size(inst, $2, 8);
   120  		x86_64_fild_membase_size(inst, X86_64_RSP, 0, 8);
   121  		x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
   122  #endif
   123  	}
   124  
   125  JIT_OP_FLOAT32_TO_INT:
   126  	[=reg, local] -> {
   127  		x86_64_cvttss2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
   128  	}
   129  	[=reg, xreg] -> {
   130  		x86_64_cvttss2si_reg_reg_size(inst, $1, $2, 4);
   131  	}
   132  
   133  JIT_OP_FLOAT32_TO_UINT:
   134  	[=reg, local] -> {
   135  		x86_64_cvttss2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   136  	}
   137  	[=reg, xreg] -> {
   138  		x86_64_cvttss2si_reg_reg_size(inst, $1, $2, 8);
   139  	}
   140  
   141  JIT_OP_FLOAT32_TO_LONG:
   142  	[=reg, local] -> {
   143  		x86_64_cvttss2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   144  	}
   145  	[=reg, xreg] -> {
   146  		x86_64_cvttss2si_reg_reg_size(inst, $1, $2, 8);
   147  	}
   148  
   149  JIT_OP_INT_TO_FLOAT32:
   150  	[=xreg, local] -> {
   151  		x86_64_cvtsi2ss_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
   152  	}
   153  	[=xreg, reg] -> {
   154  		x86_64_cvtsi2ss_reg_reg_size(inst, $1, $2, 4);
   155  	}
   156  
   157  JIT_OP_UINT_TO_FLOAT32:
   158  	[=xreg, reg] -> {
   159  		x86_64_mov_reg_reg_size(inst, $2, $2, 4);
   160  		x86_64_cvtsi2ss_reg_reg_size(inst, $1, $2, 8);
   161  	}
   162  
   163  JIT_OP_LONG_TO_FLOAT32:
   164  	[=xreg, local] -> {
   165  		x86_64_cvtsi2ss_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   166  	}
   167  	[=xreg, reg] -> {
   168  		x86_64_cvtsi2ss_reg_reg_size(inst, $1, $2, 8);
   169  	}
   170  
   171  JIT_OP_FLOAT64_TO_FLOAT32:
   172  	[=xreg, local] -> {
   173  		x86_64_cvtsd2ss_reg_membase(inst, $1, X86_64_RBP, $2);
   174  	}
   175  	[=xreg, xreg] -> {
   176  		x86_64_cvtsd2ss_reg_reg(inst, $1, $2);
   177  	}
   178  
   179  JIT_OP_FLOAT64_TO_INT:
   180  	[=reg, local] -> {
   181  		x86_64_cvttsd2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
   182  	}
   183  	[=reg, xreg] -> {
   184  		x86_64_cvttsd2si_reg_reg_size(inst, $1, $2, 4);
   185  	}
   186  
   187  JIT_OP_FLOAT64_TO_UINT:
   188  	[=reg, local] -> {
   189  		x86_64_cvttsd2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   190  	}
   191  	[=reg, xreg] -> {
   192  		x86_64_cvttsd2si_reg_reg_size(inst, $1, $2, 8);
   193  	}
   194  
   195  JIT_OP_FLOAT64_TO_LONG:
   196  	[=reg, local] -> {
   197  		x86_64_cvttsd2si_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   198  	}
   199  	[=reg, xreg] -> {
   200  		x86_64_cvttsd2si_reg_reg_size(inst, $1, $2, 8);
   201  	}
   202  
   203  JIT_OP_INT_TO_FLOAT64:
   204  	[=xreg, local] -> {
   205  		x86_64_cvtsi2sd_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
   206  	}
   207  	[=xreg, reg] -> {
   208  		x86_64_cvtsi2sd_reg_reg_size(inst, $1, $2, 4);
   209  	}
   210  
   211  JIT_OP_UINT_TO_FLOAT64:
   212  	[=xreg, reg] -> {
   213  		x86_64_mov_reg_reg_size(inst, $2, $2, 4);
   214  		x86_64_cvtsi2sd_reg_reg_size(inst, $1, $2, 8);
   215  	}
   216  
   217  JIT_OP_LONG_TO_FLOAT64:
   218  	[=xreg, local] -> {
   219  		x86_64_cvtsi2sd_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
   220  	}
   221  	[=xreg, reg] -> {
   222  		x86_64_cvtsi2sd_reg_reg_size(inst, $1, $2, 8);
   223  	}
   224  
   225  JIT_OP_FLOAT32_TO_FLOAT64:
   226  	[=xreg, local] -> {
   227  		x86_64_cvtss2sd_reg_membase(inst, $1, X86_64_RBP, $2);
   228  	}
   229  	[=xreg, xreg] -> {
   230  		x86_64_cvtss2sd_reg_reg(inst, $1, $2);
   231  	}
   232  
   233  JIT_OP_NFLOAT_TO_INT: stack
   234  	[=reg, freg, scratch reg] -> {
   235  		inst = x86_64_nfloat_to_int(inst, $1, $3, 4);
   236  	}
   237  
   238  JIT_OP_NFLOAT_TO_LONG: stack
   239  	[=reg, freg, scratch reg] -> {
   240  		inst = x86_64_nfloat_to_int(inst, $1, $3, 8);
   241  	}
   242  
   243  JIT_OP_FLOAT32_TO_NFLOAT:
   244  	[=freg, local] -> {
   245  		x86_64_fld_membase_size(inst, X86_64_RBP, $2, 4);
   246  	}
   247  	[=freg, xreg] -> {
   248  #ifdef HAVE_RED_ZONE
   249  		x86_64_movss_membase_reg(inst, X86_64_RSP, -8, $2);
   250  		x86_64_fld_membase_size(inst, X86_64_RSP, -8, 4);
   251  #else
   252  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   253  		x86_64_movss_regp_reg(inst, X86_64_RSP, $2);
   254  		x86_64_fld_regp_size(inst, X86_64_RSP, 4);
   255  		x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   256  #endif
   257  	}
   258  
   259  JIT_OP_FLOAT64_TO_NFLOAT:
   260  	[=freg, local] -> {
   261  		x86_64_fld_membase_size(inst, X86_64_RBP, $2, 8);
   262  	}
   263  	[=freg, xreg] -> {
   264  #ifdef HAVE_RED_ZONE
   265  		x86_64_movsd_membase_reg(inst, X86_64_RSP, -8, $2);
   266  		x86_64_fld_membase_size(inst, X86_64_RSP, -8, 8);
   267  #else
   268  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   269  		x86_64_movsd_regp_reg(inst, X86_64_RSP, $2);
   270  		x86_64_fld_regp_size(inst, X86_64_RSP, 8);
   271  		x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   272  #endif
   273  	}
   274  
   275  JIT_OP_NFLOAT_TO_FLOAT32: stack
   276  	[=local, freg] -> {
   277  		x86_64_fstp_membase_size(inst, X86_64_RBP, $1, 4);
   278  	}
   279  	[=xreg, freg] -> {
   280  #ifdef HAVE_RED_ZONE
   281  		/* Avoid modifying the stack pointer by simply using negative */
   282  		/* offsets here. */
   283  		x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 4);
   284  		x86_64_movss_reg_membase(inst, $1, X86_64_RSP, -8);
   285  #else
   286  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   287  		x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
   288  		x86_64_movss_reg_regp(inst, $1, X86_64_RSP);
   289  		x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   290  #endif
   291  	}
   292  
   293  JIT_OP_NFLOAT_TO_FLOAT64: stack
   294  	[=local, freg] -> {
   295  		x86_64_fstp_membase_size(inst, X86_64_RBP, $1, 8);
   296  	}
   297  	[=xreg, freg] -> {
   298  #ifdef HAVE_RED_ZONE
   299  		/* Avoid modifying the stack pointer by simply using negative */
   300  		/* offsets here. */
   301  		x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 8);
   302  		x86_64_movsd_reg_membase(inst, $1, X86_64_RSP, -8);
   303  #else
   304  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   305  		x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
   306  		x86_64_movsd_reg_regp(inst, $1, X86_64_RSP);
   307  		x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   308  #endif
   309  	}
   310  
   311  /*
   312   * Data manipulation.
   313   */
   314  
   315  JIT_OP_COPY_LOAD_SBYTE, JIT_OP_COPY_LOAD_UBYTE, JIT_OP_COPY_STORE_BYTE: copy
   316  	[=local, imm] -> {
   317  		x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 1);
   318  	}
   319  	[=local, reg] -> {
   320  		x86_64_mov_membase_reg_size(inst, X86_64_RBP, $1, $2, 1);
   321  	}
   322  	[reg] -> {}
   323  
   324  JIT_OP_COPY_LOAD_SHORT, JIT_OP_COPY_LOAD_USHORT, JIT_OP_COPY_STORE_SHORT: copy
   325  	[=local, imm] -> {
   326  		x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 2);
   327  	}
   328  	[=local, reg] -> {
   329  		x86_64_mov_membase_reg_size(inst, X86_64_RBP, $1, $2, 2);
   330  	}
   331  	[reg] -> {}
   332  
   333  JIT_OP_COPY_INT: copy
   334  	[=local, imm] -> {
   335  		x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 4);
   336  	}
   337  	[reg] -> {}
   338  
   339  JIT_OP_COPY_LONG: copy
   340  	[=local, imms32] -> {
   341  		x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 8);
   342  	}
   343  	[reg] -> {}
   344  
   345  JIT_OP_COPY_FLOAT32: copy
   346  	[=local, xreg] -> {
   347  		x86_64_movss_membase_reg(inst, X86_64_RBP, $1, $2);
   348  	}
   349  	[xreg] -> {}
   350  
   351  JIT_OP_COPY_FLOAT64: copy
   352  	[=local, xreg] -> {
   353  		x86_64_movsd_membase_reg(inst, X86_64_RBP, $1, $2);
   354  	}
   355  	[xreg] -> {}
   356  
   357  JIT_OP_COPY_NFLOAT: copy, stack
   358  	[freg] -> {}
   359  
   360  JIT_OP_COPY_STRUCT:
   361  	[=frame, frame, scratch reg, scratch xreg,
   362  		if("jit_type_get_size(jit_value_get_type(insn->dest)) <= _JIT_MAX_MEMCPY_INLINE")] -> {
   363  		inst = small_struct_copy(gen, inst, X86_64_RBP, $1, X86_64_RBP, $2,
   364  								 jit_value_get_type(insn->dest), $3, $4);
   365  	}
   366  	[=frame, frame, clobber(creg), clobber(xreg)] -> {
   367  		inst = memory_copy(gen, inst, X86_64_RBP, $1, X86_64_RBP, $2,
   368  				   jit_type_get_size(jit_value_get_type(insn->dest)));
   369  	}
   370  
   371  JIT_OP_ADDRESS_OF:
   372  	[=reg, frame] -> {
   373  		x86_64_lea_membase_size(inst, $1, X86_64_RBP, $2, 8);
   374  	}
   375  
   376  /*
   377   * Stack pushes and pops.
   378   */
   379  
   380  JIT_OP_INCOMING_REG, JIT_OP_RETURN_REG: note
   381          [reg] -> {
   382  		/*
   383  		 * This rule does nothing itself. Also at this point
   384  		 * the value is supposed to be already in the register
   385  		 * so the "reg" pattern does not load it either. But
   386  		 * it allows the allocator to check the liveness flags
   387  		 * and free the register if the value is dead.
   388  		 */
   389  	}
   390  
   391  JIT_OP_RETRIEVE_FRAME_POINTER: note
   392  	[=reg] -> {
   393  		x86_64_mov_reg_reg_size(inst, $1, X86_64_RBP, 8);
   394  	}
   395  
   396  JIT_OP_PUSH_INT: note
   397  	[imm] -> {
   398  		x86_64_push_imm(inst, $1);
   399  		gen->stack_changed = 1;
   400  	}
   401  	[local] -> {
   402  		x86_64_push_membase_size(inst, X86_64_RBP, $1, 4);
   403  		gen->stack_changed = 1;
   404  	}
   405  	[reg] -> {
   406  		x86_64_push_reg_size(inst, $1, 4);
   407  		gen->stack_changed = 1;
   408  	}
   409  
   410  JIT_OP_PUSH_LONG: note
   411  	[imm] -> {
   412  		if(($1 >= (jit_nint)jit_min_int) && ($1 <= (jit_nint)jit_max_int))
   413  		{
   414  			x86_64_push_imm(inst, $1);
   415  		}
   416  		else
   417  		{
   418  			jit_int *ptr = (jit_int *)&($1);
   419  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   420  			x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4);
   421  			x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4);
   422  		}
   423  		gen->stack_changed = 1;
   424  	}
   425  	[local] -> {
   426  		x86_64_push_membase_size(inst, X86_64_RBP, $1, 8);
   427  		gen->stack_changed = 1;
   428  	}
   429  	[reg] -> {
   430  		x86_64_push_reg_size(inst, $1, 8);
   431  		gen->stack_changed = 1;
   432  	}
   433  
   434  JIT_OP_PUSH_FLOAT32: note
   435  	[imm] -> {
   436  		jit_int *ptr = (jit_int *)($1);
   437  		x86_64_push_imm_size(inst, ptr[0], 4);
   438  		gen->stack_changed = 1;
   439  	}
   440  	[local] -> {
   441  		x86_64_push_membase_size(inst, X86_64_RBP, $1, 4);
   442  		gen->stack_changed = 1;
   443  	}
   444  	[xreg] -> {
   445  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   446  		x86_64_movss_membase_reg(inst, X86_64_RSP, 0, $1);
   447  		gen->stack_changed = 1;
   448  	}
   449  
   450  JIT_OP_PUSH_FLOAT64: note
   451  	[imm] -> {
   452  		jit_int *ptr = (jit_int *)($1);
   453  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   454  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4);
   455  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4);
   456  		gen->stack_changed = 1;
   457  	}
   458  	[local] -> {
   459  		x86_64_push_membase_size(inst, X86_64_RBP, $1, 8);
   460  		gen->stack_changed = 1;
   461  	}
   462  	[xreg] -> {
   463  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   464  		x86_64_movsd_membase_reg(inst, X86_64_RSP, 0, $1);
   465  		gen->stack_changed = 1;
   466  	}
   467  
   468  JIT_OP_PUSH_NFLOAT: note, stack
   469  	[imm] -> {
   470  		jit_int *ptr = (jit_int *)($1);
   471  		if(sizeof(jit_nfloat) != sizeof(jit_float64))
   472  		{
   473  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   474  			x86_64_mov_membase_imm_size(inst, X86_64_RSP, 8, ptr[2], 4);
   475  		}
   476  		else
   477  		{
   478  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, sizeof(jit_float64), 8);
   479  		}
   480  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4);
   481  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4);
   482  		gen->stack_changed = 1;
   483  	}
   484  	[local, scratch reg] -> {
   485  		if(sizeof(jit_nfloat) != sizeof(jit_float64))
   486  		{
   487  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   488  			x86_64_mov_reg_membase_size(inst, $2, X86_64_RBP, $1 + 8, 4);
   489  			x86_64_mov_membase_reg_size(inst, X86_64_RSP, 8, $2, 4);
   490  		}
   491  		else
   492  		{
   493  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   494  		}
   495  		x86_64_mov_reg_membase_size(inst, $2, X86_64_RBP, $1, 8);
   496  		x86_64_mov_membase_reg_size(inst, X86_64_RSP, 0, $2, 8);
   497  		gen->stack_changed = 1;
   498  	}
   499  	[freg] -> {
   500  		if(sizeof(jit_nfloat) != sizeof(jit_float64))
   501  		{
   502  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   503  			x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 10);
   504  		}
   505  		else
   506  		{
   507  			x86_64_sub_reg_imm_size(inst, X86_64_RSP, sizeof(jit_float64), 8);
   508  			x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 8);
   509  		}
   510  		gen->stack_changed = 1;
   511  	}
   512  
   513  JIT_OP_PUSH_STRUCT: note, more_space
   514  	[reg, if("((jit_nuint)jit_value_get_nint_constant(insn->value2)) <= 32")] -> {
   515  		jit_nuint size;
   516  		jit_nuint last_part;
   517  		size = (jit_nuint)jit_value_get_nint_constant(insn->value2);
   518  		last_part = size & 0x7;
   519  		if(last_part)
   520  		{
   521  			/* Handle the possible last part smaller than 8 bytes */
   522  			size -= last_part;
   523  
   524  			/* We don't care about the last not needed bytes */
   525  			x86_64_push_membase_size(inst, $1, size, 8);
   526  		}
   527  		/* Handle full multiple pointer sized parts */
   528  		while(size > 0)
   529  		{
   530  			size -= sizeof(void *);
   531  			x86_64_push_membase_size(inst, $1, size, 8);
   532  		}
   533  		gen->stack_changed = 1;
   534  	}
   535  	[reg, clobber(creg), clobber(xreg)] -> {
   536  		/* Handle arbitrary-sized structures */
   537  		jit_nuint size;
   538  		size = (jit_nuint)jit_value_get_nint_constant(insn->value2);
   539  		/* TODO: Maybe we should check for sizes > 2GB? */
   540  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, ROUND_STACK(size), 8);
   541  		inst = memory_copy(gen, inst, X86_64_RSP, 0, $1, 0, size);
   542  		gen->stack_changed = 1;
   543  	}
   544  
   545  JIT_OP_POP_STACK:
   546  	[] -> {
   547  		x86_64_add_reg_imm_size(inst, X86_64_RSP, insn->value1->address, 8);
   548  		gen->stack_changed = 1;
   549  	}
   550  
   551  /*
   552   * Parameter passing via parameter area
   553   */
   554  
   555  JIT_OP_SET_PARAM_INT: note
   556  	[imm, imm] -> {
   557  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, $1, 4);
   558  	}
   559  	[reg, imm] -> {
   560  		x86_64_mov_membase_reg_size(inst, X86_64_RSP, $2, $1, 4);
   561  	}
   562  
   563  JIT_OP_SET_PARAM_LONG: note
   564  	[imms32, imm] -> {
   565  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, $1, 8);
   566  	}
   567  	[imm, imm] -> {
   568  		jit_int *ptr = (jit_int *)&($1);
   569  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2 + 4, ptr[1], 4);
   570  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, ptr[0], 4);
   571  	}
   572  	[reg, imm] -> {
   573  		x86_64_mov_membase_reg_size(inst, X86_64_RSP, $2, $1, 8);
   574  	}
   575  
   576  JIT_OP_SET_PARAM_FLOAT32: note
   577  	[imm, imm] -> {
   578  		jit_int *ptr = (jit_int *)($1);
   579  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, ptr[0], 4);
   580  	}
   581  	[xreg, imm] -> {
   582  		x86_64_movss_membase_reg(inst, X86_64_RSP, $2, $1);
   583  	}
   584  
   585  JIT_OP_SET_PARAM_FLOAT64: note
   586  	[imm, imm] -> {
   587  		jit_int *ptr = (jit_int *)($1);
   588  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2 + 4, ptr[1], 4);
   589  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, ptr[0], 4);
   590  	}
   591  	[xreg, imm] -> {
   592  		x86_64_movsd_membase_reg(inst, X86_64_RSP, $2, $1);
   593  	}
   594  
   595  JIT_OP_SET_PARAM_NFLOAT: note
   596  	[imm, imm] -> {
   597  		jit_int *ptr = (jit_int *)($1);
   598  		if(sizeof(jit_nfloat) != sizeof(jit_float64))
   599  		{
   600  			x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2 + 8, ptr[2], 4);
   601  		}
   602  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2 + 4, ptr[1], 4);
   603  		x86_64_mov_membase_imm_size(inst, X86_64_RSP, $2, ptr[0], 4);
   604  	}
   605  	[freg, imm] -> {
   606  		if(sizeof(jit_nfloat) != sizeof(jit_float64))
   607  		{
   608  			x86_64_fstp_membase_size(inst, X86_64_RSP, $2, 10);
   609  		}
   610  		else
   611  		{
   612  			x86_64_fstp_membase_size(inst, X86_64_RSP, $2, 8);
   613  		}
   614  	}
   615  
   616  JIT_OP_SET_PARAM_STRUCT: note
   617  	[reg, imm, clobber(creg), clobber(xreg)] -> {
   618  		/* Handle arbitrary-sized structures */
   619  		jit_nint offset = jit_value_get_nint_constant(insn->dest);
   620  		/* TODO: Maybe we should check for sizes > 2GB? */
   621  		inst = memory_copy(gen, inst, X86_64_RSP, offset, $1, 0, $2);
   622  	}
   623  
   624  
   625  /*
   626   * Opcodes to handle return values
   627   */
   628  
   629  JIT_OP_FLUSH_SMALL_STRUCT:
   630  	[] -> {
   631  		inst = flush_return_struct(inst, insn->value1);
   632  	}
   633  
   634  JIT_OP_RETURN:
   635  	[] -> {
   636  		inst = jump_to_epilog(gen, inst, block);
   637  	}
   638  
   639  JIT_OP_RETURN_INT: note
   640  	[reg("rax")] -> {
   641  		inst = jump_to_epilog(gen, inst, block);
   642  	}
   643  
   644  JIT_OP_RETURN_LONG: note
   645  	[reg("rax")] -> {
   646  		inst = jump_to_epilog(gen, inst, block);
   647  	}
   648  
   649  JIT_OP_RETURN_FLOAT32: note
   650  	[xreg("xmm0")] -> {
   651  		inst = jump_to_epilog(gen, inst, block);
   652  	}
   653  
   654  JIT_OP_RETURN_FLOAT64: note
   655  	[xreg("xmm0")] -> {
   656  		inst = jump_to_epilog(gen, inst, block);
   657  	}
   658  
   659  JIT_OP_RETURN_NFLOAT: note, stack
   660  	[freg, clobber(freg)] -> {
   661  		/* clobber(freg) frees all registers on the fp stack */
   662  		inst = jump_to_epilog(gen, inst, block);
   663  	}
   664  
   665  JIT_OP_RETURN_SMALL_STRUCT: note
   666  	[rreg, imm] -> {
   667  		inst = return_struct(inst, func, $1);
   668  		inst = jump_to_epilog(gen, inst, block);
   669  	}
   670  
   671  /*
   672   * Pointer-relative loads and stores.
   673   */
   674  
   675  JIT_OP_LOAD_RELATIVE_SBYTE:
   676  	[=reg, reg, imm] -> {
   677  		if($3 == 0)
   678  		{
   679  			x86_64_movsx8_reg_regp_size(inst, $1, $2, 8);
   680  		}
   681  		else
   682  		{
   683  			x86_64_movsx8_reg_membase_size(inst, $1, $2, $3, 8);
   684  		}
   685  	}
   686  
   687  JIT_OP_LOAD_RELATIVE_UBYTE:
   688  	[=reg, reg, imm] -> {
   689  		if($3 == 0)
   690  		{
   691  			x86_64_movzx8_reg_regp_size(inst, $1, $2, 8);
   692  		}
   693  		else
   694  		{
   695  			x86_64_movzx8_reg_membase_size(inst, $1, $2, $3, 8);
   696  		}
   697  	}
   698  
   699  JIT_OP_LOAD_RELATIVE_SHORT:
   700  	[=reg, reg, imm] -> {
   701  		if($3 == 0)
   702  		{
   703  			x86_64_movsx16_reg_regp_size(inst, $1, $2, 8);
   704  		}
   705  		else
   706  		{
   707  			x86_64_movsx16_reg_membase_size(inst, $1, $2, $3, 8);
   708  		}
   709  	}
   710  
   711  JIT_OP_LOAD_RELATIVE_USHORT:
   712  	[=reg, reg, imm] -> {
   713  		if($3 == 0)
   714  		{
   715  			x86_64_movzx16_reg_regp_size(inst, $1, $2, 8);
   716  		}
   717  		else
   718  		{
   719  			x86_64_movzx16_reg_membase_size(inst, $1, $2, $3, 8);
   720  		}
   721  	}
   722  
   723  JIT_OP_LOAD_RELATIVE_INT:
   724  	[=reg, reg, imm] -> {
   725  		if($3 == 0)
   726  		{
   727  			x86_64_mov_reg_regp_size(inst, $1, $2, 4);
   728  		}
   729  		else
   730  		{
   731  			x86_64_mov_reg_membase_size(inst, $1, $2, $3, 4);
   732  		}
   733  	}
   734  
   735  JIT_OP_LOAD_RELATIVE_LONG:
   736  	[=reg, reg, imm] -> {
   737  		if($3 == 0)
   738  		{
   739  			x86_64_mov_reg_regp_size(inst, $1, $2, 8);
   740  		}
   741  		else
   742  		{
   743  			x86_64_mov_reg_membase_size(inst, $1, $2, $3, 8);
   744  		}
   745  	}
   746  
   747  JIT_OP_LOAD_RELATIVE_FLOAT32:
   748  	[=xreg, reg, imm] -> {
   749  		if($3 == 0)
   750  		{
   751  			x86_64_movss_reg_regp(inst, $1, $2);
   752  		}
   753  		else
   754  		{
   755  			x86_64_movss_reg_membase(inst, $1, $2, $3);
   756  		}
   757  	}
   758  
   759  JIT_OP_LOAD_RELATIVE_FLOAT64:
   760  	[=xreg, reg, imm] -> {
   761  		if($3 == 0)
   762  		{
   763  			x86_64_movsd_reg_regp(inst, $1, $2);
   764  		}
   765  		else
   766  		{
   767  			x86_64_movsd_reg_membase(inst, $1, $2, $3);
   768  		}
   769  	}
   770  
   771  JIT_OP_LOAD_RELATIVE_NFLOAT:
   772  	[=freg, reg, imm, if("sizeof(jit_nfloat) != sizeof(jit_float64)")] -> {
   773  		x86_64_fld_membase_size(inst, $2, $3, 10);
   774  	}
   775  	[=freg, reg, imm, if("sizeof(jit_nfloat) == sizeof(jit_float64)")] -> {
   776  		x86_64_fld_membase_size(inst, $2, $3, 8);
   777  	}
   778  
   779  JIT_OP_LOAD_RELATIVE_STRUCT: more_space
   780  	[=frame, reg, imm, scratch reg, scratch xreg,
   781  		if("jit_type_get_size(jit_value_get_type(insn->dest)) <= _JIT_MAX_MEMCPY_INLINE")] -> {
   782  		inst = small_struct_copy(gen, inst, X86_64_RBP, $1, $2, $3,
   783  								 jit_value_get_type(insn->dest), $4, $5);
   784  	}
   785  	[=frame, reg, imm, clobber(creg), clobber(xreg)] -> {
   786  		inst = memory_copy(gen, inst, X86_64_RBP, $1, $2, $3,
   787  				   jit_type_get_size(jit_value_get_type(insn->dest)));
   788  	}
   789  
   790  JIT_OP_STORE_RELATIVE_BYTE: ternary
   791  	[reg, imm, imm] -> {
   792  		if($3 == 0)
   793  		{
   794  			x86_64_mov_regp_imm_size(inst, $1, $2, 1);
   795  		}
   796  		else
   797  		{
   798  			x86_64_mov_membase_imm_size(inst, $1, $3, $2, 1);
   799  		}
   800  	}
   801  	[reg, reg, imm] -> {
   802  		if($3 == 0)
   803  		{
   804  			x86_64_mov_regp_reg_size(inst, $1, $2, 1);
   805  		}
   806  		else
   807  		{
   808  			x86_64_mov_membase_reg_size(inst, $1, $3, $2, 1);
   809  		}
   810  	}
   811  
   812  JIT_OP_STORE_RELATIVE_SHORT: ternary
   813  	[reg, imm, imm] -> {
   814  		if($3 == 0)
   815  		{
   816  			x86_64_mov_regp_imm_size(inst, $1, $2, 2);
   817  		}
   818  		else
   819  		{
   820  			x86_64_mov_membase_imm_size(inst, $1, $3, $2, 2);
   821  		}
   822  	}
   823  	[reg, reg, imm] -> {
   824  		if($3 == 0)
   825  		{
   826  			x86_64_mov_regp_reg_size(inst, $1, $2, 2);
   827  		}
   828  		else
   829  		{
   830  			x86_64_mov_membase_reg_size(inst, $1, $3, $2, 2);
   831  		}
   832  	}
   833  
   834  JIT_OP_STORE_RELATIVE_INT: ternary
   835  	[reg, imm, imm] -> {
   836  		if($3 == 0)
   837  		{
   838  			x86_64_mov_regp_imm_size(inst, $1, $2, 4);
   839  		}
   840  		else
   841  		{
   842  			x86_64_mov_membase_imm_size(inst, $1, $3, $2, 4);
   843  		}
   844  	}
   845  	[reg, reg, imm] -> {
   846  		if($3 == 0)
   847  		{
   848  			x86_64_mov_regp_reg_size(inst, $1, $2, 4);
   849  		}
   850  		else
   851  		{
   852  			x86_64_mov_membase_reg_size(inst, $1, $3, $2, 4);
   853  		}
   854  	}
   855  
   856  JIT_OP_STORE_RELATIVE_LONG: ternary
   857  	[reg, imms32, imm] -> {
   858  		if($3 == 0)
   859  		{
   860  			x86_64_mov_regp_imm_size(inst, $1, $2, 8);
   861  		}
   862  		else
   863  		{
   864  			x86_64_mov_membase_imm_size(inst, $1, $3, $2, 8);
   865  		}
   866  	}
   867  	[reg, reg, imm] -> {
   868  		if($3 == 0)
   869  		{
   870  			x86_64_mov_regp_reg_size(inst, $1, $2, 8);
   871  		}
   872  		else
   873  		{
   874  			x86_64_mov_membase_reg_size(inst, $1, $3, $2, 8);
   875  		}
   876  	}
   877  
   878  JIT_OP_STORE_RELATIVE_FLOAT32: ternary
   879  	[reg, imm, imm] -> {
   880  		if($3 == 0)
   881  		{
   882  			x86_64_mov_regp_imm_size(inst, $1, ((jit_int *)($2))[0], 4);
   883  		}
   884  		else
   885  		{
   886  			x86_64_mov_membase_imm_size(inst, $1, $3, ((jit_int *)($2))[0], 4);
   887  		}
   888  	}
   889  	[reg, xreg, imm] -> {
   890  		if($3 == 0)
   891  		{
   892  			x86_64_movss_regp_reg(inst, $1, $2);
   893  		}
   894  		else
   895  		{
   896  			x86_64_movss_membase_reg(inst, $1, $3, $2);
   897  		}
   898  	}
   899  
   900  JIT_OP_STORE_RELATIVE_FLOAT64: ternary
   901  	[reg, imm, imm] -> {
   902  		x86_64_mov_membase_imm_size(inst, $1, $3, ((int *)($2))[0], 4);
   903  		x86_64_mov_membase_imm_size(inst, $1, $3 + 4, ((int *)($2))[1], 4);
   904  	}
   905  	[reg, xreg, imm] -> {
   906  		if($3 == 0)
   907  		{
   908  			x86_64_movsd_regp_reg(inst, $1, $2);
   909  		}
   910  		else
   911  		{
   912  			x86_64_movsd_membase_reg(inst, $1, $3, $2);
   913  		}
   914  	}
   915  
   916  JIT_OP_STORE_RELATIVE_STRUCT: ternary
   917  	[reg, frame, imm, scratch reg, scratch xreg,
   918  		if("jit_type_get_size(jit_value_get_type(insn->value1)) <= _JIT_MAX_MEMCPY_INLINE")] -> {
   919  		inst = small_struct_copy(gen, inst, $1, $3, X86_64_RBP, $2,
   920  								 jit_value_get_type(insn->value1), $4, $5);
   921  	}
   922  	[reg, frame, imm, clobber(creg), clobber(xreg)] -> {
   923  		inst = memory_copy(gen, inst, $1, $3, X86_64_RBP, $2,
   924  				   jit_type_get_size(jit_value_get_type(insn->value1)));
   925  	}
   926  
   927  JIT_OP_ADD_RELATIVE:
   928  	[reg, immzero] -> {
   929  	}
   930  	[=reg, reg, imms32] -> {
   931  		x86_64_lea_membase_size(inst, $1, $2, $3, 8);
   932  	}
   933  
   934  /*
   935   * Array element loads and stores.
   936   */
   937  
   938  JIT_OP_LOAD_ELEMENT_SBYTE:
   939  	[=reg, reg, reg] -> {
   940  		x86_64_movsx8_reg_memindex_size(inst, $1, $2, 0, $3, 0, 4);
   941  	}
   942  
   943  JIT_OP_LOAD_ELEMENT_UBYTE:
   944  	[=reg, reg, reg] -> {
   945  		x86_64_movzx8_reg_memindex_size(inst, $1, $2, 0, $3, 0, 4);
   946  	}
   947  
   948  JIT_OP_LOAD_ELEMENT_SHORT:
   949  	[=reg, reg, reg] -> {
   950  		x86_64_movsx16_reg_memindex_size(inst, $1, $2, 0, $3, 1, 4);
   951  	}
   952  
   953  JIT_OP_LOAD_ELEMENT_USHORT:
   954  	[=reg, reg, reg] -> {
   955  		x86_64_movzx16_reg_memindex_size(inst, $1, $2, 0, $3, 1, 4);
   956  	}
   957  
   958  JIT_OP_LOAD_ELEMENT_INT:
   959  	[=reg, reg, reg] -> {
   960  		x86_64_mov_reg_memindex_size(inst, $1, $2, 0, $3, 2, 4);
   961  	}
   962  
   963  JIT_OP_LOAD_ELEMENT_LONG:
   964  	[=reg, reg, reg] -> {
   965  		x86_64_mov_reg_memindex_size(inst, $1, $2, 0, $3, 3, 8);
   966  	}
   967  
   968  JIT_OP_LOAD_ELEMENT_FLOAT32:
   969  	[=xreg, reg, reg] -> {
   970  		x86_64_movss_reg_memindex(inst, $1, $2, 0, $3, 2);
   971  	}
   972  
   973  JIT_OP_LOAD_ELEMENT_FLOAT64:
   974  	[=xreg, reg, reg] -> {
   975  		x86_64_movsd_reg_memindex(inst, $1, $2, 0, $3, 3);
   976  	}
   977  
   978  JIT_OP_STORE_ELEMENT_BYTE: ternary
   979  	[reg, reg, reg] -> {
   980  		x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 0, $3, 1);
   981  	}
   982  
   983  JIT_OP_STORE_ELEMENT_SHORT: ternary
   984  	[reg, reg, reg] -> {
   985  		x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 1, $3, 2);
   986  	}
   987  
   988  JIT_OP_STORE_ELEMENT_INT: ternary
   989  	[reg, reg, reg] -> {
   990  		x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 2, $3, 4);
   991  	}
   992  
   993  JIT_OP_STORE_ELEMENT_LONG: ternary
   994  	[reg, reg, imm] -> {
   995  		if($3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int)
   996  		{
   997  			x86_64_mov_memindex_imm_size(inst, $1, 0, $2, 3, $3, 8);
   998  		}
   999  		else
  1000  		{
  1001  			jit_int *long_ptr = (jit_int *)(&($3));
  1002  
  1003  			x86_64_mov_memindex_imm_size(inst, $1, 0, $2, 3, long_ptr[0], 4);
  1004  			x86_64_mov_memindex_imm_size(inst, $1, 4, $2, 3, long_ptr[1], 4);
  1005  		}
  1006  	}
  1007  	[reg, reg, reg] -> {
  1008  		x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 3, $3, 8);
  1009  	}
  1010  
  1011  JIT_OP_STORE_ELEMENT_FLOAT32: ternary
  1012  	[reg, reg, xreg] -> {
  1013  		x86_64_movss_memindex_reg(inst, $1, 0, $2, 2, $3);
  1014  	}
  1015  
  1016  JIT_OP_STORE_ELEMENT_FLOAT64: ternary
  1017  	[reg, reg, xreg] -> {
  1018  		x86_64_movsd_memindex_reg(inst, $1, 0, $2, 3, $3);
  1019  	}
  1020  
  1021  /*
  1022   * Arithmetic opcodes.
  1023   */
  1024  
  1025  /*
  1026   * 4 byte integer versions
  1027   */
  1028  
  1029  JIT_OP_IADD: commutative
  1030  	[reg, immzero] -> {
  1031  	}
  1032  	[=reg, reg, imms32] -> {
  1033  		if($1 != $2)
  1034  		{
  1035  			x86_64_lea_membase_size(inst, $1, $2, $3, 4);
  1036  		}
  1037  		else if($3 == 1)
  1038  		{
  1039  			x86_64_inc_reg_size(inst, $1, 4);
  1040  		}
  1041  		else if($3 == -1)
  1042  		{
  1043  			x86_64_dec_reg_size(inst, $1, 4);
  1044  		}
  1045  		else
  1046  		{
  1047  			x86_64_add_reg_imm_size(inst, $1, $3, 4);
  1048  		}
  1049  	}
  1050  	[reg, local] -> {
  1051  		x86_64_add_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1052  	}
  1053  	[reg, reg] -> {
  1054  		x86_64_add_reg_reg_size(inst, $1, $2, 4);
  1055  	}
  1056  
  1057  JIT_OP_ISUB:
  1058  	[reg, immzero] -> {
  1059  	}
  1060  	[=reg, reg, imms32] -> {
  1061  		if($1 != $2)
  1062  		{
  1063  			x86_64_lea_membase_size(inst, $1, $2, -$3, 4);
  1064  		}
  1065  		else if($3 == 1)
  1066  		{
  1067  			x86_64_dec_reg_size(inst, $1, 4);
  1068  		}
  1069  		else if($3 == -1)
  1070  		{
  1071  			x86_64_inc_reg_size(inst, $1, 4);
  1072  		}
  1073  		else
  1074  		{
  1075  			x86_64_sub_reg_imm_size(inst, $1, $3, 4);
  1076  		}
  1077  	}
  1078  	[reg, local] -> {
  1079  		x86_64_sub_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1080  	}
  1081  	[reg, reg] -> {
  1082  		x86_64_sub_reg_reg_size(inst, $1, $2, 4);
  1083  	}
  1084  
  1085  JIT_OP_INEG:
  1086  	[reg] -> {
  1087  		x86_64_neg_reg_size(inst, $1, 4);
  1088  	}
  1089  
  1090  JIT_OP_IMUL: commutative
  1091  	[reg, immzero] -> {
  1092  		x86_64_clear_reg(inst, $1);
  1093  	}
  1094  	[reg, imm, if("$2 == -1")] -> {
  1095  		x86_64_neg_reg_size(inst, $1, 4);
  1096  	}
  1097  	[reg, imm, if("$2 == 1")] -> {
  1098  	}
  1099  	[reg, imm, if("$2 == 2")] -> {
  1100  		x86_64_add_reg_reg_size(inst, $1, $1, 4);
  1101  	}
  1102  	[reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1103  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1104  		jit_nuint shift, value = $2 >> 1;
  1105  		for(shift = 0; value; value >>= 1)
  1106  		{
  1107  		    ++shift;
  1108  		}
  1109  		x86_64_shl_reg_imm_size(inst, $1, shift, 4);
  1110  	}
  1111  	[reg, imm] -> {
  1112  		x86_64_imul_reg_reg_imm_size(inst, $1, $1, $2, 4);
  1113  	}
  1114  	[reg, local] -> {
  1115  		x86_64_imul_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1116  	}
  1117  	[reg, reg] -> {
  1118  		x86_64_imul_reg_reg_size(inst, $1, $2, 4);
  1119  	}
  1120  
  1121  JIT_OP_IDIV: more_space
  1122  	[any, immzero] -> {
  1123  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1124  	}
  1125  	[reg, imm, if("$2 == 1")] -> {
  1126  	}
  1127  	[reg, imm, if("$2 == -1")] -> {
  1128  		/* Dividing by -1 gives an exception if the argument
  1129  		   is minint, or simply negates for other values */
  1130  		jit_int min_int = jit_min_int;
  1131  		unsigned char *patch;
  1132  		x86_64_cmp_reg_imm_size(inst, $1, min_int, 4);
  1133  		patch = inst;
  1134  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1135  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1136  		x86_patch(patch, inst);
  1137  		x86_64_neg_reg_size(inst, $1, 4);
  1138  	}
  1139  	[reg, imm, scratch reg, if("$2 == 2")] -> {
  1140  		/* move the value to be divided to the temporary */
  1141  		x86_64_mov_reg_reg_size(inst, $3, $1, 4);
  1142  		/* shift the temporary to the 31 bits right */
  1143  		/* The result is 1 for negative values and 0 for zero or */
  1144  		/* positive values. (corrective value for negatives) */
  1145  		x86_64_shr_reg_imm_size(inst, $3, 0x1f, 4);
  1146  		/* Add the corrective value to the divident */
  1147  		x86_64_add_reg_reg_size(inst, $1, $3, 4);
  1148  		/* and do the right shift */
  1149  		x86_64_sar_reg_imm_size(inst, $1, 1, 4);
  1150  	}
  1151  	[reg, imm, scratch reg, if("($2 > 0) && (((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1152  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1153  		jit_nuint shift, corr, value = $2 >> 1;
  1154  		for(shift = 0; value; value >>= 1)
  1155  		{
  1156  		    ++shift;
  1157  		}
  1158  		corr = $2 - 1;
  1159  		x86_64_lea_membase_size(inst, $3, $1, corr, 4);
  1160  		x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1161  		x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 4);
  1162  		x86_64_sar_reg_imm_size(inst, $1, shift, 4);
  1163  	}
  1164  	[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1165  		x86_64_mov_reg_imm_size(inst, $3, $2, 4);
  1166  		x86_64_cdq(inst);
  1167  		x86_64_idiv_reg_size(inst, $3, 4);
  1168  	}
  1169  	[reg("rax"), dreg, scratch reg("rdx")] -> {
  1170  		jit_int min_int = jit_min_int;
  1171  		unsigned char *patch, *patch2;
  1172  #ifndef JIT_USE_SIGNALS
  1173  		x86_64_test_reg_reg_size(inst, $2, $2, 4);
  1174  		patch = inst;
  1175  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1176  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1177  		x86_patch(patch, inst);
  1178  #endif
  1179  		x86_64_cmp_reg_imm_size(inst, $2, -1, 4);
  1180  		patch = inst;
  1181  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1182  		x86_64_cmp_reg_imm_size(inst, $1, min_int, 4);
  1183  		patch2 = inst;
  1184  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1185  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1186  		x86_patch(patch, inst);
  1187  		x86_patch(patch2, inst);
  1188  		x86_64_cdq(inst);
  1189  		x86_64_idiv_reg_size(inst, $2, 4);
  1190  	}
  1191  
  1192  JIT_OP_IDIV_UN: more_space
  1193  	[any, immzero] -> {
  1194  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1195  	}
  1196  	[reg, imm, if("$2 == 1")] -> {
  1197  	}
  1198  	[reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1199  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1200  		jit_nuint shift, value = $2 >> 1;
  1201  		for(shift = 0; value; value >>= 1)
  1202  		{
  1203  		    ++shift;
  1204  		}
  1205  		x86_64_shr_reg_imm_size(inst, $1, shift, 4);
  1206  	}
  1207  	[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1208  		x86_64_mov_reg_imm_size(inst, $3, $2, 4);
  1209  		x86_64_clear_reg(inst, X86_64_RDX);
  1210  		x86_64_div_reg_size(inst, $3, 4);
  1211  	}
  1212  	[reg("rax"), dreg, scratch reg("rdx")] -> {
  1213  #ifndef JIT_USE_SIGNALS
  1214  		unsigned char *patch;
  1215  		x86_64_test_reg_reg_size(inst, $2, $2, 4);
  1216  		patch = inst;
  1217  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1218  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1219  		x86_patch(patch, inst);
  1220  #endif
  1221  		x86_64_clear_reg(inst, X86_64_RDX);
  1222  		x86_64_div_reg_size(inst, $2, 4);
  1223  	}
  1224  
  1225  JIT_OP_IREM: more_space
  1226  	[any, immzero] -> {
  1227  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1228  	}
  1229  	[reg, imm, if("$2 == 1")] -> {
  1230  		x86_64_clear_reg(inst, $1);
  1231  	}
  1232  	[reg, imm, if("$2 == -1")] -> {
  1233  		/* Dividing by -1 gives an exception if the argument
  1234  		   is minint, or simply gives a remainder of zero */
  1235  		jit_int min_int = jit_min_int;
  1236  		unsigned char *patch;
  1237  		x86_64_cmp_reg_imm_size(inst, $1, min_int, 4);
  1238  		patch = inst;
  1239  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1240  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1241  		x86_patch(patch, inst);
  1242  		x86_64_clear_reg(inst, $1);
  1243  	}
  1244  	[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1245  		x86_64_mov_reg_imm_size(inst, $4, $3, 4);
  1246  		x86_64_cdq(inst);
  1247  		x86_64_idiv_reg_size(inst, $4, 4);
  1248  	}
  1249  	[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
  1250  		jit_int min_int = jit_min_int;
  1251  		unsigned char *patch, *patch2;
  1252  #ifndef JIT_USE_SIGNALS
  1253  		x86_64_test_reg_reg_size(inst, $3, $3, 4);
  1254  		patch = inst;
  1255  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1256  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1257  		x86_patch(patch, inst);
  1258  #endif
  1259  		x86_64_cmp_reg_imm_size(inst, $3, -1, 4);
  1260  		patch = inst;
  1261  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1262  		x86_64_cmp_reg_imm_size(inst, $2, min_int, 4);
  1263  		patch2 = inst;
  1264  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1265  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1266  		x86_patch(patch, inst);
  1267  		x86_patch(patch2, inst);
  1268  		x86_64_cdq(inst);
  1269  		x86_64_idiv_reg_size(inst, $3, 4);
  1270  	}
  1271  
  1272  JIT_OP_IREM_UN: more_space
  1273  	[any, immzero] -> {
  1274  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1275  	}
  1276  	[reg, imm, if("$2 == 1")] -> {
  1277  		x86_64_clear_reg(inst, $1);
  1278  	}
  1279  	[reg, imm, if("($2 & ($2 - 1)) == 0")] -> {
  1280  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1281  		x86_64_and_reg_imm_size(inst, $1, $2 - 1, 4);
  1282  	}
  1283  	[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1284  		x86_64_mov_reg_imm_size(inst, $4, $3, 4);
  1285  		x86_64_clear_reg(inst, X86_64_RDX);
  1286  		x86_64_div_reg_size(inst, $4, 4);
  1287  	}
  1288  	[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
  1289  #ifndef JIT_USE_SIGNALS
  1290  		unsigned char *patch;
  1291  		x86_64_test_reg_reg_size(inst, $3, $3, 4);
  1292  		patch = inst;
  1293  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1294  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1295  		x86_patch(patch, inst);
  1296  #endif
  1297  		x86_64_clear_reg(inst, X86_64_RDX);
  1298  		x86_64_div_reg_size(inst, $3, 4);
  1299  	}
  1300  
  1301  /*
  1302   * 8 byte integer versions
  1303   */
  1304  
  1305  JIT_OP_LADD: commutative
  1306  	[reg, immzero] -> {
  1307  	}
  1308  	[=reg, reg, imms32] -> {
  1309  		if($1 != $2)
  1310  		{
  1311  			x86_64_lea_membase_size(inst, $1, $2, $3, 8);
  1312  		}
  1313  		else if($3 == 1)
  1314  		{
  1315  			x86_64_inc_reg_size(inst, $1, 8);
  1316  		}
  1317  		else if($3 == -1)
  1318  		{
  1319  			x86_64_dec_reg_size(inst, $1, 8);
  1320  		}
  1321  		else
  1322  		{
  1323  			x86_64_add_reg_imm_size(inst, $1, $3, 8);
  1324  		}
  1325  	}
  1326  	[reg, local] -> {
  1327  		x86_64_add_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1328  	}
  1329  	[reg, reg] -> {
  1330  		x86_64_add_reg_reg_size(inst, $1, $2, 8);
  1331  	}
  1332  
  1333  JIT_OP_LSUB:
  1334  	[reg, immzero] -> {
  1335  	}
  1336  	[=reg, reg, imms32] -> {
  1337  		if($1 != $2)
  1338  		{
  1339  			x86_64_lea_membase_size(inst, $1, $2, -$3, 8);
  1340  		}
  1341  		else if($3 == 1)
  1342  		{
  1343  			x86_64_dec_reg_size(inst, $1, 8);
  1344  		}
  1345  		else if($3 == -1)
  1346  		{
  1347  			x86_64_inc_reg_size(inst, $1, 8);
  1348  		}
  1349  		else
  1350  		{
  1351  			x86_64_sub_reg_imm_size(inst, $1, $3, 8);
  1352  		}
  1353  	}
  1354  	[reg, local] -> {
  1355  		x86_64_sub_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1356  	}
  1357  	[reg, reg] -> {
  1358  		x86_64_sub_reg_reg_size(inst, $1, $2, 8);
  1359  	}
  1360  
  1361  JIT_OP_LNEG:
  1362  	[reg] -> {
  1363  		x86_64_neg_reg_size(inst, $1, 8);
  1364  	}
  1365  
  1366  JIT_OP_LMUL: commutative
  1367  	[reg, immzero] -> {
  1368  		x86_64_clear_reg(inst, $1);
  1369  	}
  1370  	[reg, imm, if("$2 == -1")] -> {
  1371  		x86_64_neg_reg_size(inst, $1, 8);
  1372  	}
  1373  	[reg, imm, if("$2 == 1")] -> {
  1374  	}
  1375  	[reg, imm, if("$2 == 2")] -> {
  1376  		x86_64_add_reg_reg_size(inst, $1, $1, 8);
  1377  	}
  1378  	[reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1379  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1380  		jit_nuint shift, value = $2 >> 1;
  1381  		for(shift = 0; value; value >>= 1)
  1382  		{
  1383  		    ++shift;
  1384  		}
  1385  		x86_64_shl_reg_imm_size(inst, $1, shift, 8);
  1386  	}
  1387  	[reg, imms32] -> {
  1388  		x86_64_imul_reg_reg_imm_size(inst, $1, $1, $2, 8);
  1389  	}
  1390  	[reg, local] -> {
  1391  		x86_64_imul_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1392  	}
  1393  	[reg, reg] -> {
  1394  		x86_64_imul_reg_reg_size(inst, $1, $2, 8);
  1395  	}
  1396  
  1397  JIT_OP_LDIV: more_space
  1398  	[any, immzero] -> {
  1399  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1400  	}
  1401  	[reg, imm, if("$2 == 1")] -> {
  1402  	}
  1403  	[reg, imm, scratch reg, if("$2 == -1")] -> {
  1404  		/* Dividing by -1 gives an exception if the argument
  1405  		   is minint, or simply negates for other values */
  1406  		jit_long min_long = jit_min_long;
  1407  		unsigned char *patch;
  1408  		x86_64_mov_reg_imm_size(inst, $3, min_long, 8);
  1409  		x86_64_cmp_reg_reg_size(inst, $1, $3, 8);
  1410  		patch = inst;
  1411  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1412  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1413  		x86_patch(patch, inst);
  1414  		x86_64_neg_reg_size(inst, $1, 8);
  1415  	}
  1416  	[reg, imm, scratch reg, if("$2 == 2")] -> {
  1417  		/* move the value to be divided to the temporary */
  1418  		x86_64_mov_reg_reg_size(inst, $3, $1, 8);
  1419  		/* shift the temporary to the 63 bits right */
  1420  		/* The result is 1 for negative values and 0 for zero or */
  1421  		/* positive values. (corrective value for negatives) */
  1422  		x86_64_shr_reg_imm_size(inst, $3, 0x3f, 8);
  1423  		/* Add the corrective value to the divident */
  1424  		x86_64_add_reg_reg_size(inst, $1, $3, 8);
  1425  		/* and do the right shift */
  1426  		x86_64_sar_reg_imm_size(inst, $1, 1, 8);
  1427  	}
  1428  	[reg, imm, scratch reg, if("($2 > 0) && (((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1429  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1430  		jit_nuint shift, value = $2 >> 1;
  1431  		for(shift = 0; value; value >>= 1)
  1432  		{
  1433  		    ++shift;
  1434  		}
  1435  		if((jit_nuint)$2 <= (jit_nuint)jit_max_uint)
  1436  		{
  1437  			jit_nuint corr = ($2 - 1);
  1438  
  1439  			x86_64_lea_membase_size(inst, $3, $1, corr, 8);
  1440  			x86_64_test_reg_reg_size(inst, $1, $1, 8);
  1441  		}
  1442  		else
  1443  		{
  1444  			jit_nuint corr = ($2 - 1);
  1445  
  1446  			if(corr <= (jit_nuint)jit_max_uint)
  1447  			{
  1448  				x86_64_mov_reg_imm_size(inst, $3, corr, 4);
  1449  			}
  1450  			else
  1451  			{
  1452  				x86_64_mov_reg_imm_size(inst, $3, corr, 8);
  1453  			}
  1454  			x86_64_test_reg_reg_size(inst, $1, $1, 8);
  1455  			x86_64_lea_memindex_size(inst, $3, $1, 0, $3, 0, 8);
  1456  		}
  1457  		x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 8);
  1458  		x86_64_sar_reg_imm_size(inst, $1, shift, 8);
  1459  	}
  1460  	[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1461  		x86_64_mov_reg_imm_size(inst, $3, $2, 8);
  1462  		x86_64_cqo(inst);
  1463  		x86_64_idiv_reg_size(inst, $3, 8);
  1464  	}
  1465  	[reg("rax"), dreg, scratch reg("rdx")] -> {
  1466  		jit_long min_long = jit_min_long;
  1467  		unsigned char *patch, *patch2;
  1468  #ifndef JIT_USE_SIGNALS
  1469  		x86_64_or_reg_reg_size(inst, $2, $2, 8);
  1470  		patch = inst;
  1471  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1472  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1473  		x86_patch(patch, inst);
  1474  #endif
  1475  		x86_64_cmp_reg_imm_size(inst, $2, -1, 8);
  1476  		patch = inst;
  1477  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1478  		x86_64_mov_reg_imm_size(inst, $3, min_long, 8);
  1479  		x86_64_cmp_reg_reg_size(inst, $1, $3, 8);
  1480  		patch2 = inst;
  1481  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1482  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1483  		x86_patch(patch, inst);
  1484  		x86_patch(patch2, inst);
  1485  		x86_64_cqo(inst);
  1486  		x86_64_idiv_reg_size(inst, $2, 8);
  1487  	}
  1488  
  1489  JIT_OP_LDIV_UN: more_space
  1490  	[any, immzero] -> {
  1491  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1492  	}
  1493  	[reg, imm, if("$2 == 1")] -> {
  1494  	}
  1495  	[reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1496  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1497  		jit_nuint shift, value = $2 >> 1;
  1498  		for(shift = 0; value; value >>= 1)
  1499  		{
  1500  		    ++shift;
  1501  		}
  1502  		x86_64_shr_reg_imm_size(inst, $1, shift, 8);
  1503  	}
  1504  	[reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1505  		x86_64_mov_reg_imm_size(inst, $3, $2, 8);
  1506  		x86_64_clear_reg(inst, X86_64_RDX);
  1507  		x86_64_div_reg_size(inst, $3, 8);
  1508  	}
  1509  	[reg("rax"), dreg, scratch reg("rdx")] -> {
  1510  #ifndef JIT_USE_SIGNALS
  1511  		unsigned char *patch;
  1512  		x86_64_test_reg_reg_size(inst, $2, $2, 8);
  1513  		patch = inst;
  1514  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1515  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1516  		x86_patch(patch, inst);
  1517  #endif
  1518  		x86_64_clear_reg(inst, X86_64_RDX);
  1519  		x86_64_div_reg_size(inst, $2, 8);
  1520  	}
  1521  
  1522  JIT_OP_LREM: more_space
  1523  	[any, immzero] -> {
  1524  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1525  	}
  1526  	[reg, imm, if("$2 == 1")] -> {
  1527  		x86_64_clear_reg(inst, $1);
  1528  	}
  1529  	[reg, imm, if("$2 == -1")] -> {
  1530  		/* Dividing by -1 gives an exception if the argument
  1531  		   is minint, or simply gives a remainder of zero */
  1532  		jit_long min_long = jit_min_long;
  1533  		unsigned char *patch;
  1534  		x86_64_cmp_reg_imm_size(inst, $1, min_long, 8);
  1535  		patch = inst;
  1536  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1537  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1538  		x86_patch(patch, inst);
  1539  		x86_64_clear_reg(inst, $1);
  1540  	}
  1541  	[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1542  		x86_64_mov_reg_imm_size(inst, $4, $3, 8);
  1543  		x86_64_cqo(inst);
  1544  		x86_64_idiv_reg_size(inst, $4, 8);
  1545  	}
  1546  	[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
  1547  		jit_long min_long = jit_min_long;
  1548  		unsigned char *patch, *patch2;
  1549  #ifndef JIT_USE_SIGNALS
  1550  		x86_64_test_reg_reg_size(inst, $3, $3, 8);
  1551  		patch = inst;
  1552  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1553  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1554  		x86_patch(patch, inst);
  1555  #endif
  1556  		x86_64_mov_reg_imm_size(inst, $1, min_long, 8);
  1557  		x86_64_cmp_reg_imm_size(inst, $3, -1, 8);
  1558  		patch = inst;
  1559  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1560  		x86_64_cmp_reg_reg_size(inst, $2, $1, 8);
  1561  		patch2 = inst;
  1562  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1563  		inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
  1564  		x86_patch(patch, inst);
  1565  		x86_patch(patch2, inst);
  1566  		x86_64_cqo(inst);
  1567  		x86_64_idiv_reg_size(inst, $3, 8);
  1568  	}
  1569  
  1570  JIT_OP_LREM_UN: more_space
  1571  	[any, immzero] -> {
  1572  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1573  	}
  1574  	[reg, imm, if("$2 == 1")] -> {
  1575  		x86_64_clear_reg(inst, $1);
  1576  	}
  1577  	[reg, imm, scratch reg, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
  1578  		/* x & (x - 1) is equal to zero if x is a power of 2  */
  1579  		if(($2 >= jit_min_int) && ($2 <= jit_max_int))
  1580  		{
  1581  			x86_64_and_reg_imm_size(inst, $1, $2 - 1, 8);
  1582  		}
  1583  		else
  1584  		{
  1585  			jit_long temp = $2 - 1;
  1586  
  1587  			x86_64_mov_reg_imm_size(inst, $3, temp, 8);
  1588  			x86_64_and_reg_reg_size(inst, $1, $3, 8);
  1589  		}
  1590  	}
  1591  	[=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
  1592  		x86_64_mov_reg_imm_size(inst, $4, $3, 8);
  1593  		x86_64_clear_reg(inst, X86_64_RDX);
  1594  		x86_64_div_reg_size(inst, $4, 8);
  1595  	}
  1596  	[=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
  1597  #ifndef JIT_USE_SIGNALS
  1598  		unsigned char *patch;
  1599  		x86_64_test_reg_reg_size(inst, $3, $3, 8);
  1600  		patch = inst;
  1601  		x86_branch8(inst, X86_CC_NE, 0, 0);
  1602  		inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
  1603  		x86_patch(patch, inst);
  1604  #endif
  1605  		x86_64_clear_reg(inst, X86_64_RDX);
  1606  		x86_64_div_reg_size(inst, $3, 8);
  1607  	}
  1608  
  1609  /*
  1610   * single precision float versions
  1611   */
  1612  
  1613  JIT_OP_FADD:  commutative
  1614  	[xreg, imm] -> {
  1615  		_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_ADD, $1, (jit_float32 *)$2);
  1616  	}
  1617  	[xreg, local] -> {
  1618  		x86_64_addss_reg_membase(inst, $1, X86_64_RBP, $2);
  1619  	}
  1620  	[xreg, xreg] -> {
  1621  		x86_64_addss_reg_reg(inst, $1, $2);
  1622  	}
  1623  
  1624  JIT_OP_FSUB:
  1625  	[xreg, imm] -> {
  1626  		_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_SUB, $1, (jit_float32 *)$2);
  1627  	}
  1628  	[xreg, xreg] -> {
  1629  		x86_64_subss_reg_reg(inst, $1, $2);
  1630  	}
  1631  	[xreg, local] -> {
  1632  		x86_64_subss_reg_membase(inst, $1, X86_64_RBP, $2);
  1633  	}
  1634  
  1635  JIT_OP_FMUL: commutative
  1636  	[xreg, imm] -> {
  1637  		_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MUL, $1, (jit_float32 *)$2);
  1638  	}
  1639  	[xreg, xreg] -> {
  1640  		x86_64_mulss_reg_reg(inst, $1, $2);
  1641  	}
  1642  	[xreg, local] -> {
  1643  		x86_64_mulss_reg_membase(inst, $1, X86_64_RBP, $2);
  1644  	}
  1645  
  1646  JIT_OP_FDIV:
  1647  	[xreg, imm] -> {
  1648  		_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_DIV, $1, (jit_float32 *)$2);
  1649  	}
  1650  	[xreg, xreg] -> {
  1651  		x86_64_divss_reg_reg(inst, $1, $2);
  1652  	}
  1653  	[xreg, local] -> {
  1654  		x86_64_divss_reg_membase(inst, $1, X86_64_RBP, $2);
  1655  	}
  1656  
  1657  JIT_OP_FABS:
  1658  	[xreg] -> {
  1659  		/* Simply clear the sign */
  1660  		jit_uint values[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
  1661  
  1662  		_jit_plops_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
  1663  	}
  1664  
  1665  JIT_OP_FNEG:
  1666  	[xreg] -> {
  1667  		/* Simply toggle the sign */
  1668  		jit_uint values[4] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
  1669  
  1670  		_jit_plops_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
  1671  	}
  1672  
  1673  /*
  1674   * double precision float versions
  1675   */
  1676  
  1677  JIT_OP_DADD: commutative
  1678  	[xreg, imm] -> {
  1679  		_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_ADD, $1, (jit_float64 *)$2);
  1680  	}
  1681  	[xreg, local] -> {
  1682  		x86_64_addsd_reg_membase(inst, $1, X86_64_RBP, $2);
  1683  	}
  1684  	[xreg, xreg] -> {
  1685  		x86_64_addsd_reg_reg(inst, $1, $2);
  1686  	}
  1687  
  1688  JIT_OP_DSUB:
  1689  	[xreg, imm] -> {
  1690  		_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_SUB, $1, (jit_float64 *)$2);
  1691  	}
  1692  	[xreg, local] -> {
  1693  		x86_64_subsd_reg_membase(inst, $1, X86_64_RBP, $2);
  1694  	}
  1695  	[xreg, xreg] -> {
  1696  		x86_64_subsd_reg_reg(inst, $1, $2);
  1697  	}
  1698  
  1699  JIT_OP_DMUL: commutative
  1700  	[xreg, imm] -> {
  1701  		_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MUL, $1, (jit_float64 *)$2);
  1702  	}
  1703  	[xreg, local] -> {
  1704  		x86_64_mulsd_reg_membase(inst, $1, X86_64_RBP, $2);
  1705  	}
  1706  	[xreg, xreg] -> {
  1707  		x86_64_mulsd_reg_reg(inst, $1, $2);
  1708  	}
  1709  
  1710  JIT_OP_DDIV:
  1711  	[xreg, imm] -> {
  1712  		_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_DIV, $1, (jit_float64 *)$2);
  1713  	}
  1714  	[xreg, local] -> {
  1715  		x86_64_divsd_reg_membase(inst, $1, X86_64_RBP, $2);
  1716  	}
  1717  	[xreg, xreg] -> {
  1718  		x86_64_divsd_reg_reg(inst, $1, $2);
  1719  	}
  1720  
  1721  JIT_OP_DABS:
  1722  	[xreg] -> {
  1723  		/* Simply clear the sign */
  1724  		jit_ulong values[2] = {0x7fffffffffffffff, 0x7fffffffffffffff};
  1725  
  1726  		_jit_plopd_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
  1727  	}
  1728  
  1729  JIT_OP_DNEG:
  1730  	[xreg] -> {
  1731  		/* Simply toggle the sign */
  1732  		jit_ulong values[2] = {0x8000000000000000, 0x8000000000000000};
  1733  
  1734  		_jit_plopd_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
  1735  	}
  1736  
  1737  /*
  1738   * native float versions
  1739   */
  1740  JIT_OP_NFABS: stack
  1741  	[freg] -> {
  1742  		x86_64_fabs(inst);
  1743  	}
  1744  
  1745  JIT_OP_NFNEG:  stack
  1746  	[freg] -> {
  1747  		x86_64_fchs(inst);
  1748  	}
  1749  
  1750  /*
  1751   * Bitwise opcodes.
  1752   */
  1753  
  1754  JIT_OP_IAND: commutative
  1755  	[reg, imm] -> {
  1756  		x86_64_and_reg_imm_size(inst, $1, $2, 4);
  1757  	}
  1758  	[reg, local] -> {
  1759  		x86_64_and_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1760  	}
  1761  	[reg, reg] -> {
  1762  		x86_64_and_reg_reg_size(inst, $1, $2, 4);
  1763  	}
  1764  
  1765  JIT_OP_IOR: commutative
  1766  	[reg, imm] -> {
  1767  		x86_64_or_reg_imm_size(inst, $1, $2, 4);
  1768  	}
  1769  	[reg, local] -> {
  1770  		x86_64_or_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1771  	}
  1772  	[reg, reg] -> {
  1773  		x86_64_or_reg_reg_size(inst, $1, $2, 4);
  1774  	}
  1775  
  1776  JIT_OP_IXOR: commutative
  1777  	[reg, imm] -> {
  1778  		x86_64_xor_reg_imm_size(inst, $1, $2, 4);
  1779  	}
  1780  	[reg, local] -> {
  1781  		x86_64_xor_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1782  	}
  1783  	[reg, reg] -> {
  1784  		x86_64_xor_reg_reg_size(inst, $1, $2, 4);
  1785  	}
  1786  
  1787  JIT_OP_INOT:
  1788  	[reg] -> {
  1789  		x86_64_not_reg_size(inst, $1, 4);
  1790  	}
  1791  
  1792  JIT_OP_ISHL:
  1793  	[reg, imm] -> {
  1794  		x86_64_shl_reg_imm_size(inst, $1, ($2 & 0x1F), 4);
  1795  	}
  1796  	[sreg, reg("rcx")] -> {
  1797  		x86_64_shl_reg_size(inst, $1, 4);
  1798  	}
  1799  
  1800  JIT_OP_ISHR:
  1801  	[reg, imm] -> {
  1802  		x86_64_sar_reg_imm_size(inst, $1, ($2 & 0x1F), 4);
  1803  	}
  1804  	[sreg, reg("rcx")] -> {
  1805  		x86_64_sar_reg_size(inst, $1, 4);
  1806  	}
  1807  
  1808  JIT_OP_ISHR_UN:
  1809  	[reg, imm] -> {
  1810  		x86_64_shr_reg_imm_size(inst, $1, ($2 & 0x1F), 4);
  1811  	}
  1812  	[sreg, reg("rcx")] -> {
  1813  		x86_64_shr_reg_size(inst, $1, 4);
  1814  	}
  1815  
  1816  JIT_OP_LAND: commutative
  1817  	[reg, imms32] -> {
  1818  		x86_64_and_reg_imm_size(inst, $1, $2, 8);
  1819  	}
  1820  	[reg, local] -> {
  1821  		x86_64_and_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1822  	}
  1823  	[reg, reg] -> {
  1824  		x86_64_and_reg_reg_size(inst, $1, $2, 8);
  1825  	}
  1826  
  1827  JIT_OP_LOR: commutative
  1828  	[reg, imms32] -> {
  1829  		x86_64_or_reg_imm_size(inst, $1, $2, 8);
  1830  	}
  1831  	[reg, local] -> {
  1832  		x86_64_or_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1833  	}
  1834  	[reg, reg] -> {
  1835  		x86_64_or_reg_reg_size(inst, $1, $2, 8);
  1836  	}
  1837  
  1838  JIT_OP_LXOR: commutative
  1839  	[reg, imms32] -> {
  1840  		x86_64_xor_reg_imm_size(inst, $1, $2, 8);
  1841  	}
  1842  	[reg, local] -> {
  1843  		x86_64_xor_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  1844  	}
  1845  	[reg, reg] -> {
  1846  		x86_64_xor_reg_reg_size(inst, $1, $2, 8);
  1847  	}
  1848  
  1849  JIT_OP_LNOT:
  1850  	[reg] -> {
  1851  		x86_64_not_reg_size(inst, $1, 8);
  1852  	}
  1853  
  1854  JIT_OP_LSHL:
  1855  	[reg, imm] -> {
  1856  		x86_64_shl_reg_imm_size(inst, $1, ($2 & 0x3F), 8);
  1857  	}
  1858  	[sreg, reg("rcx")] -> {
  1859  		x86_64_shl_reg_size(inst, $1, 8);
  1860  	}
  1861  
  1862  JIT_OP_LSHR:
  1863  	[reg, imm] -> {
  1864  		x86_64_sar_reg_imm_size(inst, $1, ($2 & 0x3F), 8);
  1865  	}
  1866  	[sreg, reg("rcx")] -> {
  1867  		x86_64_sar_reg_size(inst, $1, 8);
  1868  	}
  1869  
  1870  JIT_OP_LSHR_UN:
  1871  	[reg, imm] -> {
  1872  		x86_64_shr_reg_imm_size(inst, $1, ($2 & 0x3F), 8);
  1873  	}
  1874  	[sreg, reg("rcx")] -> {
  1875  		x86_64_shr_reg_size(inst, $1, 8);
  1876  	}
  1877  
  1878  /*
  1879   * Branch opcodes.
  1880   */
  1881  
  1882  JIT_OP_BR: branch
  1883  	[] -> {
  1884  		inst = output_branch(func, inst, 0xEB /* jmp */, insn);
  1885  	}
  1886  
  1887  JIT_OP_BR_IFALSE: branch
  1888  	[reg] -> {
  1889  		x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1890  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  1891  	}
  1892  
  1893  JIT_OP_BR_ITRUE: branch
  1894  	[reg] -> {
  1895  		x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1896  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  1897  	}
  1898  
  1899  JIT_OP_BR_IEQ: branch, commutative
  1900  	[reg, immzero] -> {
  1901  		x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1902  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  1903  	}
  1904  	[reg, imm] -> {
  1905  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1906  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  1907  	}
  1908  	[reg, local] -> {
  1909  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1910  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  1911  	}
  1912  	[reg, reg] -> {
  1913  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  1914  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  1915  	}
  1916  
  1917  JIT_OP_BR_INE: branch, commutative
  1918  	[reg, immzero] -> {
  1919  		x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1920  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  1921  	}
  1922  	[reg, imm] -> {
  1923  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1924  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  1925  	}
  1926  	[reg, local] -> {
  1927  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1928  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  1929  	}
  1930  	[reg, reg] -> {
  1931  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  1932  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  1933  	}
  1934  
  1935  JIT_OP_BR_ILT: branch
  1936  	[reg, imm] -> {
  1937  		if($2 == 0)
  1938  		{
  1939  			x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1940  		}
  1941  		else
  1942  		{
  1943  			x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1944  		}
  1945  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  1946  	}
  1947  	[reg, local] -> {
  1948  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1949  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  1950  	}
  1951  	[reg, reg] -> {
  1952  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  1953  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  1954  	}
  1955  
  1956  JIT_OP_BR_ILT_UN: branch
  1957  	[reg, imm] -> {
  1958  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1959  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  1960  	}
  1961  	[reg, local] -> {
  1962  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1963  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  1964  	}
  1965  	[reg, reg] -> {
  1966  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  1967  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  1968  	}
  1969  
  1970  JIT_OP_BR_ILE: branch
  1971  	[reg, imm] -> {
  1972  		if($2 == 0)
  1973  		{
  1974  			x86_64_test_reg_reg_size(inst, $1, $1, 4);
  1975  		}
  1976  		else
  1977  		{
  1978  			x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1979  		}
  1980  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  1981  	}
  1982  	[reg, local] -> {
  1983  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1984  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  1985  	}
  1986  	[reg, reg] -> {
  1987  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  1988  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  1989  	}
  1990  
  1991  JIT_OP_BR_ILE_UN: branch
  1992  	[reg, imm] -> {
  1993  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  1994  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  1995  	}
  1996  	[reg, local] -> {
  1997  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  1998  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  1999  	}
  2000  	[reg, reg] -> {
  2001  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2002  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  2003  	}
  2004  
  2005  JIT_OP_BR_IGT: branch
  2006  	[reg, imm] -> {
  2007  		if($2 == 0)
  2008  		{
  2009  			x86_64_test_reg_reg_size(inst, $1, $1, 4);
  2010  		}
  2011  		else
  2012  		{
  2013  			x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  2014  		}
  2015  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2016  	}
  2017  	[reg, local] -> {
  2018  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  2019  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2020  	}
  2021  	[reg, reg] -> {
  2022  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2023  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2024  	}
  2025  
  2026  JIT_OP_BR_IGT_UN: branch
  2027  	[reg, imm] -> {
  2028  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  2029  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2030  	}
  2031  	[reg, local] -> {
  2032  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  2033  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2034  	}
  2035  	[reg, reg] -> {
  2036  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2037  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2038  	}
  2039  
  2040  JIT_OP_BR_IGE: branch
  2041  	[reg, imm] -> {
  2042  		if($2 == 0)
  2043  		{
  2044  			x86_64_test_reg_reg_size(inst, $1, $1, 4);
  2045  		}
  2046  		else
  2047  		{
  2048  			x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  2049  		}
  2050  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2051  	}
  2052  	[reg, local] -> {
  2053  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  2054  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2055  	}
  2056  	[reg, reg] -> {
  2057  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2058  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2059  	}
  2060  
  2061  JIT_OP_BR_IGE_UN: branch
  2062  	[reg, imm] -> {
  2063  		x86_64_cmp_reg_imm_size(inst, $1, $2, 4);
  2064  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2065  	}
  2066  	[reg, local] -> {
  2067  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4);
  2068  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2069  	}
  2070  	[reg, reg] -> {
  2071  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2072  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2073  	}
  2074  
  2075  JIT_OP_BR_LFALSE: branch
  2076  	[reg] -> {
  2077  		x86_64_test_reg_reg_size(inst, $1, $1, 8);
  2078  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  2079  	}
  2080  
  2081  JIT_OP_BR_LTRUE: branch
  2082  	[reg] -> {
  2083  		x86_64_test_reg_reg_size(inst, $1, $1, 8);
  2084  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  2085  	}
  2086  
  2087  JIT_OP_BR_LEQ: branch, commutative
  2088  	[reg, immzero] -> {
  2089  		x86_64_test_reg_reg_size(inst, $1, $1, 8);
  2090  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  2091  	}
  2092  	[reg, imms32] -> {
  2093  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2094  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  2095  	}
  2096  	[reg, local] -> {
  2097  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2098  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  2099  	}
  2100  	[reg, reg] -> {
  2101  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2102  		inst = output_branch(func, inst, 0x74 /* eq */, insn);
  2103  	}
  2104  
  2105  JIT_OP_BR_LNE: branch, commutative
  2106  	[reg, immzero] -> {
  2107  		x86_64_test_reg_reg_size(inst, $1, $1, 8);
  2108  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  2109  	}
  2110  	[reg, imms32] -> {
  2111  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2112  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  2113  	}
  2114  	[reg, local] -> {
  2115  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2116  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  2117  	}
  2118  	[reg, reg] -> {
  2119  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2120  		inst = output_branch(func, inst, 0x75 /* ne */, insn);
  2121  	}
  2122  
  2123  JIT_OP_BR_LLT: branch
  2124  	[reg, imms32] -> {
  2125  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2126  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  2127  	}
  2128  	[reg, local] -> {
  2129  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2130  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  2131  	}
  2132  	[reg, reg] -> {
  2133  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2134  		inst = output_branch(func, inst, 0x7C /* lt */, insn);
  2135  	}
  2136  
  2137  JIT_OP_BR_LLT_UN: branch
  2138  	[reg, imms32] -> {
  2139  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2140  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  2141  	}
  2142  	[reg, local] -> {
  2143  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2144  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  2145  	}
  2146  	[reg, reg] -> {
  2147  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2148  		inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
  2149  	}
  2150  
  2151  JIT_OP_BR_LLE: branch
  2152  	[reg, imms32] -> {
  2153  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2154  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  2155  	}
  2156  	[reg, local] -> {
  2157  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2158  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  2159  	}
  2160  	[reg, reg] -> {
  2161  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2162  		inst = output_branch(func, inst, 0x7E /* le */, insn);
  2163  	}
  2164  
  2165  JIT_OP_BR_LLE_UN: branch
  2166  	[reg, imms32] -> {
  2167  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2168  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  2169  	}
  2170  	[reg, local] -> {
  2171  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2172  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  2173  	}
  2174  	[reg, reg] -> {
  2175  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2176  		inst = output_branch(func, inst, 0x76 /* le_un */, insn);
  2177  	}
  2178  
  2179  JIT_OP_BR_LGT: branch
  2180  	[reg, imms32] -> {
  2181  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2182  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2183  	}
  2184  	[reg, local] -> {
  2185  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2186  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2187  	}
  2188  	[reg, reg] -> {
  2189  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2190  		inst = output_branch(func, inst, 0x7F /* gt */, insn);
  2191  	}
  2192  
  2193  JIT_OP_BR_LGT_UN: branch
  2194  	[reg, imms32] -> {
  2195  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2196  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2197  	}
  2198  	[reg, local] -> {
  2199  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2200  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2201  	}
  2202  	[reg, reg] -> {
  2203  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2204  		inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
  2205  	}
  2206  
  2207  JIT_OP_BR_LGE: branch
  2208  	[reg, imms32] -> {
  2209  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2210  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2211  	}
  2212  	[reg, local] -> {
  2213  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2214  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2215  	}
  2216  	[reg, reg] -> {
  2217  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2218  		inst = output_branch(func, inst, 0x7D /* ge */, insn);
  2219  	}
  2220  
  2221  JIT_OP_BR_LGE_UN: branch
  2222  	[reg, imms32] -> {
  2223  		x86_64_cmp_reg_imm_size(inst, $1, $2, 8);
  2224  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2225  	}
  2226  	[reg, local] -> {
  2227  		x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8);
  2228  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2229  	}
  2230  	[reg, reg] -> {
  2231  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2232  		inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
  2233  	}
  2234  
  2235  JIT_OP_BR_FEQ: branch, commutative
  2236  	[xreg, imm] -> {
  2237  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_Z, $1, (void *)$2, 0, 0, insn);
  2238  	}
  2239  	[xreg, local] -> {
  2240  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_Z, $1, X86_64_RBP, $2, 0, 0, insn);
  2241  	}
  2242  	[xreg, xreg] -> {
  2243  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_Z, $1, $2, 0, 0, insn);
  2244  	}
  2245  
  2246  JIT_OP_BR_FNE: branch, commutative
  2247  	[xreg, imm] -> {
  2248  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NZ, $1, (void *)$2, 0, 1, insn);
  2249  	}
  2250  	[xreg, local] -> {
  2251  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NZ, $1, X86_64_RBP, $2, 0, 1, insn);
  2252  	}
  2253  	[xreg, xreg, space("20")] -> {
  2254  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NZ, $1, $2, 0, 1, insn);
  2255  	}
  2256  
  2257  JIT_OP_BR_FLT: branch
  2258  	[xreg, imm] -> {
  2259  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_C, $1, (void *)$2, 0, 0, insn);
  2260  	}
  2261  	[xreg, local] -> {
  2262  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_C, $1, X86_64_RBP, $2, 0, 0, insn);
  2263  	}
  2264  	[xreg, xreg] -> {
  2265  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_C, $1, $2, 0, 0, insn);
  2266  	}
  2267  
  2268  JIT_OP_BR_FLT_INV: branch
  2269  	[xreg, imm] -> {
  2270  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_C, $1, (void *)$2, 0, 1, insn);
  2271  	}
  2272  	[xreg, local] -> {
  2273  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_C, $1, X86_64_RBP, $2, 0, 1, insn);
  2274  	}
  2275  	[xreg, xreg] -> {
  2276  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_C, $1, $2, 0, 1, insn);
  2277  	}
  2278  
  2279  JIT_OP_BR_FLE: branch
  2280  	[xreg, imm] -> {
  2281  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_BE, $1, (void *)$2, 0, 0, insn);
  2282  	}
  2283  	[xreg, local] -> {
  2284  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_BE, $1, X86_64_RBP, $2, 0, 0, insn);
  2285  	}
  2286  	[xreg, xreg] -> {
  2287  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_BE, $1, $2, 0, 0, insn);
  2288  	}
  2289  
  2290  JIT_OP_BR_FLE_INV: branch
  2291  	[xreg, imm] -> {
  2292  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_BE, $1, (void *)$2, 0, 1, insn);
  2293  	}
  2294  	[xreg, local] -> {
  2295  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_BE, $1, X86_64_RBP, $2, 0, 1, insn);
  2296  	}
  2297  	[xreg, xreg] -> {
  2298  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_BE, $1, $2, 0, 1, insn);
  2299  	}
  2300  
  2301  JIT_OP_BR_FGT: branch
  2302  	[xreg, imm] -> {
  2303  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NBE, $1, (void *)$2, 0, 0, insn);
  2304  	}
  2305  	[xreg, local] -> {
  2306  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NBE, $1, X86_64_RBP, $2, 0, 0, insn);
  2307  	}
  2308  	[xreg, xreg] -> {
  2309  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NBE, $1, $2, 0, 0, insn);
  2310  	}
  2311  
  2312  JIT_OP_BR_FGT_INV: branch
  2313  	[xreg, imm] -> {
  2314  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NBE, $1, (void *)$2, 0, 1, insn);
  2315  	}
  2316  	[xreg, local] -> {
  2317  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NBE, $1, X86_64_RBP, $2, 0, 1, insn);
  2318  	}
  2319  	[xreg, xreg] -> {
  2320  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NBE, $1, $2, 0, 1, insn);
  2321  	}
  2322  
  2323  JIT_OP_BR_FGE: branch
  2324  	[xreg, imm] -> {
  2325  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NC, $1, (void *)$2, 0, 0, insn);
  2326  	}
  2327  	[xreg, local] -> {
  2328  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NC, $1, X86_64_RBP, $2, 0, 0, insn);
  2329  	}
  2330  	[xreg, xreg] -> {
  2331  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NC, $1, $2, 0, 0, insn);
  2332  	}
  2333  
  2334  JIT_OP_BR_FGE_INV: branch
  2335  	[xreg, imm] -> {
  2336  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NC, $1, (void *)$2, 0, 1, insn);
  2337  	}
  2338  	[xreg, local] -> {
  2339  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NC, $1, X86_64_RBP, $2, 0, 1, insn);
  2340  	}
  2341  	[xreg, xreg] -> {
  2342  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NC, $1, $2, 0, 1, insn);
  2343  	}
  2344  
  2345  JIT_OP_BR_DEQ: branch, commutative
  2346  	[xreg, imm] -> {
  2347  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_Z, $1, (void *)$2, 1, 0, insn);
  2348  	}
  2349  	[xreg, local] -> {
  2350  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_Z, $1, X86_64_RBP, $2, 1, 0, insn);
  2351  	}
  2352  	[xreg, xreg] -> {
  2353  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_Z, $1, $2, 1, 0, insn);
  2354  	}
  2355  
  2356  JIT_OP_BR_DNE: branch, commutative
  2357  	[xreg, imm] -> {
  2358  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NZ, $1, (void *)$2, 1, 1, insn);
  2359  	}
  2360  	[xreg, local] -> {
  2361  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NZ, $1, X86_64_RBP, $2, 1, 1, insn);
  2362  	}
  2363  	[xreg, xreg, space("20")] -> {
  2364  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NZ, $1, $2, 1, 1, insn);
  2365  	}
  2366  
  2367  JIT_OP_BR_DLT: branch
  2368  	[xreg, imm] -> {
  2369  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_C, $1, (void *)$2, 1, 0, insn);
  2370  	}
  2371  	[xreg, local] -> {
  2372  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_C, $1, X86_64_RBP, $2, 1, 0, insn);
  2373  	}
  2374  	[xreg, xreg] -> {
  2375  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_C, $1, $2, 1, 0, insn);
  2376  	}
  2377  
  2378  JIT_OP_BR_DLT_INV: branch
  2379  	[xreg, imm] -> {
  2380  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_C, $1, (void *)$2, 1, 1, insn);
  2381  	}
  2382  	[xreg, local] -> {
  2383  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_C, $1, X86_64_RBP, $2, 1, 1, insn);
  2384  	}
  2385  	[xreg, xreg] -> {
  2386  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_C, $1, $2, 1, 1, insn);
  2387  	}
  2388  
  2389  JIT_OP_BR_DLE: branch
  2390  	[xreg, imm] -> {
  2391  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_BE, $1, (void *)$2, 1, 0, insn);
  2392  	}
  2393  	[xreg, local] -> {
  2394  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_BE, $1, X86_64_RBP, $2, 1, 0, insn);
  2395  	}
  2396  	[xreg, xreg] -> {
  2397  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_BE, $1, $2, 1, 0, insn);
  2398  	}
  2399  
  2400  JIT_OP_BR_DLE_INV: branch
  2401  	[xreg, imm] -> {
  2402  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_BE, $1, (void *)$2, 1, 1, insn);
  2403  	}
  2404  	[xreg, local] -> {
  2405  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_BE, $1, X86_64_RBP, $2, 1, 1, insn);
  2406  	}
  2407  	[xreg, xreg] -> {
  2408  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_BE, $1, $2, 1, 1, insn);
  2409  	}
  2410  
  2411  JIT_OP_BR_DGT: branch
  2412  	[xreg, imm] -> {
  2413  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NBE, $1, (void *)$2, 1, 0, insn);
  2414  	}
  2415  	[xreg, local] -> {
  2416  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NBE, $1, X86_64_RBP, $2, 1, 0, insn);
  2417  	}
  2418  	[xreg, xreg] -> {
  2419  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NBE, $1, $2, 1, 0, insn);
  2420  	}
  2421  
  2422  JIT_OP_BR_DGT_INV: branch
  2423  	[xreg, imm] -> {
  2424  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NBE, $1, (void *)$2, 1, 1, insn);
  2425  	}
  2426  	[xreg, local] -> {
  2427  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NBE, $1, X86_64_RBP, $2, 1, 1, insn);
  2428  	}
  2429  	[xreg, xreg] -> {
  2430  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NBE, $1, $2, 1, 1, insn);
  2431  	}
  2432  
  2433  JIT_OP_BR_DGE: branch
  2434  	[xreg, imm] -> {
  2435  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NC, $1, (void *)$2, 1, 0, insn);
  2436  	}
  2437  	[xreg, local] -> {
  2438  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NC, $1, X86_64_RBP, $2, 1, 0, insn);
  2439  	}
  2440  	[xreg, xreg] -> {
  2441  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NC, $1, $2, 1, 0, insn);
  2442  	}
  2443  
  2444  JIT_OP_BR_DGE_INV: branch
  2445  	[xreg, imm] -> {
  2446  		inst = xmm_cmp_brcc_reg_imm(gen, func, inst, X86_CC_NC, $1, (void *)$2, 1, 1, insn);
  2447  	}
  2448  	[xreg, local] -> {
  2449  		inst = xmm_cmp_brcc_reg_membase(func, inst, X86_CC_NC, $1, X86_64_RBP, $2, 1, 1, insn);
  2450  	}
  2451  	[xreg, xreg] -> {
  2452  		inst = xmm_cmp_brcc_reg_reg(func, inst, X86_CC_NC, $1, $2, 1, 1, insn);
  2453  	}
  2454  
  2455  /*
  2456   * Comparison opcodes.
  2457   */
  2458  
  2459  JIT_OP_IEQ: commutative
  2460  	[=reg, reg, immzero] -> {
  2461  		x86_64_test_reg_reg_size(inst, $2, $2, 4);
  2462  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2463  	}
  2464  	[=reg, reg, imm] -> {
  2465  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2466  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2467  	}
  2468  	[=reg, reg, local] -> {
  2469  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2470  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2471  	}
  2472  	[=reg, reg, reg] -> {
  2473  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2474  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2475  	}
  2476  
  2477  JIT_OP_INE: commutative
  2478  	[=reg, reg, immzero] -> {
  2479  		x86_64_test_reg_reg_size(inst, $2, $2, 4);
  2480  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2481  	}
  2482  	[=reg, reg, imm] -> {
  2483  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2484  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2485  	}
  2486  	[=reg, reg, local] -> {
  2487  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2488  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2489  	}
  2490  	[=reg, reg, reg] -> {
  2491  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2492  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2493  	}
  2494  
  2495  JIT_OP_ILT:
  2496  	[=reg, reg, imm] -> {
  2497  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2498  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2499  	}
  2500  	[=reg, reg, local] -> {
  2501  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2502  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2503  	}
  2504  	[=reg, reg, reg] -> {
  2505  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2506  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2507  	}
  2508  
  2509  JIT_OP_ILT_UN:
  2510  	[=reg, reg, imm] -> {
  2511  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2512  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2513  	}
  2514  	[=reg, reg, local] -> {
  2515  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2516  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2517  	}
  2518  	[=reg, reg, reg] -> {
  2519  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2520  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2521  	}
  2522  
  2523  JIT_OP_ILE:
  2524  	[=reg, reg, imm] -> {
  2525  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2526  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2527  	}
  2528  	[=reg, reg, local] -> {
  2529  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2530  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2531  	}
  2532  	[=reg, reg, reg] -> {
  2533  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2534  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2535  	}
  2536  
  2537  JIT_OP_ILE_UN:
  2538  	[=reg, reg, imm] -> {
  2539  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2540  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2541  	}
  2542  	[=reg, reg, local] -> {
  2543  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2544  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2545  	}
  2546  	[=reg, reg, reg] -> {
  2547  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2548  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2549  	}
  2550  
  2551  JIT_OP_IGT:
  2552  	[=reg, reg, imm] -> {
  2553  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2554  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2555  	}
  2556  	[=reg, reg, local] -> {
  2557  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2558  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2559  	}
  2560  	[=reg, reg, reg] -> {
  2561  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2562  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2563  	}
  2564  
  2565  JIT_OP_IGT_UN:
  2566  	[=reg, reg, imm] -> {
  2567  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2568  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2569  	}
  2570  	[=reg, reg, local] -> {
  2571  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2572  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2573  	}
  2574  	[=reg, reg, reg] -> {
  2575  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2576  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2577  	}
  2578  
  2579  JIT_OP_IGE:
  2580  	[=reg, reg, imm] -> {
  2581  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2582  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2583  	}
  2584  	[=reg, reg, local] -> {
  2585  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2586  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2587  	}
  2588  	[=reg, reg, reg] -> {
  2589  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2590  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2591  	}
  2592  
  2593  JIT_OP_IGE_UN:
  2594  	[=reg, reg, imm] -> {
  2595  		x86_64_cmp_reg_imm_size(inst, $2, $3, 4);
  2596  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2597  	}
  2598  	[=reg, reg, local] -> {
  2599  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4);
  2600  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2601  	}
  2602  	[=reg, reg, reg] -> {
  2603  		x86_64_cmp_reg_reg_size(inst, $2, $3, 4);
  2604  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2605  	}
  2606  
  2607  JIT_OP_LEQ: commutative
  2608  	[=reg, reg, immzero] -> {
  2609  		x86_64_test_reg_reg_size(inst, $2, $2, 8);
  2610  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2611  	}
  2612  	[=reg, reg, imms32] -> {
  2613  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2614  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2615  	}
  2616  	[=reg, reg, local] -> {
  2617  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2618  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2619  	}
  2620  	[=reg, reg, reg] -> {
  2621  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2622  		inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
  2623  	}
  2624  
  2625  JIT_OP_LNE: commutative
  2626  	[=reg, reg, immzero] -> {
  2627  		x86_64_test_reg_reg_size(inst, $2, $2, 8);
  2628  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2629  	}
  2630  	[=reg, reg, imms32] -> {
  2631  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2632  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2633  	}
  2634  	[=reg, reg, local] -> {
  2635  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2636  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2637  	}
  2638  	[=reg, reg, reg] -> {
  2639  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2640  		inst = setcc_reg(inst, $1, X86_CC_NE, 0);
  2641  	}
  2642  
  2643  JIT_OP_LLT:
  2644  	[=reg, reg, imms32] -> {
  2645  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2646  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2647  	}
  2648  	[=reg, reg, local] -> {
  2649  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2650  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2651  	}
  2652  	[=reg, reg, reg] -> {
  2653  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2654  		inst = setcc_reg(inst, $1, X86_CC_LT, 1);
  2655  	}
  2656  
  2657  JIT_OP_LLT_UN:
  2658  	[=reg, reg, imms32] -> {
  2659  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2660  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2661  	}
  2662  	[=reg, reg, local] -> {
  2663  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2664  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2665  	}
  2666  	[=reg, reg, reg] -> {
  2667  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2668  		inst = setcc_reg(inst, $1, X86_CC_LT, 0);
  2669  	}
  2670  
  2671  JIT_OP_LLE:
  2672  	[=reg, reg, imms32] -> {
  2673  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2674  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2675  	}
  2676  	[=reg, reg, local] -> {
  2677  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2678  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2679  	}
  2680  	[=reg, reg, reg] -> {
  2681  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2682  		inst = setcc_reg(inst, $1, X86_CC_LE, 1);
  2683  	}
  2684  
  2685  JIT_OP_LLE_UN:
  2686  	[=reg, reg, imms32] -> {
  2687  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2688  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2689  	}
  2690  	[=reg, reg, local] -> {
  2691  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2692  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2693  	}
  2694  	[=reg, reg, reg] -> {
  2695  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2696  		inst = setcc_reg(inst, $1, X86_CC_LE, 0);
  2697  	}
  2698  
  2699  JIT_OP_LGT:
  2700  	[=reg, reg, imms32] -> {
  2701  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2702  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2703  	}
  2704  	[=reg, reg, local] -> {
  2705  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2706  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2707  	}
  2708  	[=reg, reg, reg] -> {
  2709  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2710  		inst = setcc_reg(inst, $1, X86_CC_GT, 1);
  2711  	}
  2712  
  2713  JIT_OP_LGT_UN:
  2714  	[=reg, reg, imms32] -> {
  2715  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2716  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2717  	}
  2718  	[=reg, reg, local] -> {
  2719  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2720  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2721  	}
  2722  	[=reg, reg, reg] -> {
  2723  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2724  		inst = setcc_reg(inst, $1, X86_CC_GT, 0);
  2725  	}
  2726  
  2727  JIT_OP_LGE:
  2728  	[=reg, reg, imms32] -> {
  2729  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2730  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2731  	}
  2732  	[=reg, reg, local] -> {
  2733  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2734  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2735  	}
  2736  	[=reg, reg, reg] -> {
  2737  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2738  		inst = setcc_reg(inst, $1, X86_CC_GE, 1);
  2739  	}
  2740  
  2741  JIT_OP_LGE_UN:
  2742  	[=reg, reg, imms32] -> {
  2743  		x86_64_cmp_reg_imm_size(inst, $2, $3, 8);
  2744  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2745  	}
  2746  	[=reg, reg, local] -> {
  2747  		x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8);
  2748  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2749  	}
  2750  	[=reg, reg, reg] -> {
  2751  		x86_64_cmp_reg_reg_size(inst, $2, $3, 8);
  2752  		inst = setcc_reg(inst, $1, X86_CC_GE, 0);
  2753  	}
  2754  
  2755  JIT_OP_FEQ: commutative
  2756  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2757  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_Z, $2, (void *)$3, $4, 0, 0);
  2758  	}
  2759  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2760  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_Z, $2, $3, $4, 0, 0);
  2761  	}
  2762  
  2763  JIT_OP_FNE: commutative
  2764  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2765  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NZ, $2, (void *)$3, $4, 0, 1);
  2766  	}
  2767  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2768  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NZ, $2, $3, $4, 0, 1);
  2769  	}
  2770  
  2771  JIT_OP_FLT:
  2772  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2773  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_C, $2, (void *)$3, $4, 0, 0);
  2774  	}
  2775  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2776  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_C, $2, $3, $4, 0, 0);
  2777  	}
  2778  
  2779  JIT_OP_FLT_INV:
  2780  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2781  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_C, $2, (void *)$3, $4, 0, 1);
  2782  	}
  2783  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2784  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_C, $2, $3, $4, 0, 1);
  2785  	}
  2786  
  2787  JIT_OP_FLE:
  2788  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2789  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_BE, $2, (void *)$3, $4, 0, 0);
  2790  	}
  2791  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2792  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_BE, $2, $3, $4, 0, 0);
  2793  	}
  2794  
  2795  JIT_OP_FLE_INV:
  2796  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2797  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_BE, $2, (void *)$3, $4, 0, 1);
  2798  	}
  2799  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2800  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_BE, $2, $3, $4, 0, 1);
  2801  	}
  2802  
  2803  JIT_OP_FGT:
  2804  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2805  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NBE, $2, (void *)$3, $4, 0, 0);
  2806  	}
  2807  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2808  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NBE, $2, $3, $4, 0, 0);
  2809  	}
  2810  
  2811  JIT_OP_FGT_INV:
  2812  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2813  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NBE, $2, (void *)$3, $4, 0, 1);
  2814  	}
  2815  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2816  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NBE, $2, $3, $4, 0, 1);
  2817  	}
  2818  
  2819  JIT_OP_FGE:
  2820  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2821  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NC, $2, (void *)$3, $4, 0, 0);
  2822  	}
  2823  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2824  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NC, $2, $3, $4, 0, 0);
  2825  	}
  2826  
  2827  JIT_OP_FGE_INV:
  2828  	[=+reg, xreg, imm, scratch reg, space("23")] -> {
  2829  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NC, $2, (void *)$3, $4, 0, 1);
  2830  	}
  2831  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2832  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NC, $2, $3, $4, 0, 1);
  2833  	}
  2834  
  2835  JIT_OP_DEQ: commutative
  2836  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2837  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_Z, $2, (void *)$3, $4, 1, 0);
  2838  	}
  2839  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2840  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_Z, $2, $3, $4, 1, 0);
  2841  	}
  2842  
  2843  JIT_OP_DNE: commutative
  2844  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2845  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NZ, $2, (void *)$3, $4, 1, 1);
  2846  	}
  2847  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2848  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NZ, $2, $3, $4, 1, 1);
  2849  	}
  2850  
  2851  JIT_OP_DLT:
  2852  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2853  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_C, $2, (void *)$3, $4, 1, 0);
  2854  	}
  2855  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2856  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_C, $2, $3, $4, 1, 0);
  2857  	}
  2858  
  2859  JIT_OP_DLT_INV:
  2860  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2861  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_C, $2, (void *)$3, $4, 1, 1);
  2862  	}
  2863  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2864  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_C, $2, $3, $4, 1, 1);
  2865  	}
  2866  
  2867  JIT_OP_DLE:
  2868  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2869  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_BE, $2, (void *)$3, $4, 1, 0);
  2870  	}
  2871  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2872  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_BE, $2, $3, $4, 1, 0);
  2873  	}
  2874  
  2875  JIT_OP_DLE_INV:
  2876  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2877  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_BE, $2, (void *)$3, $4, 1, 1);
  2878  	}
  2879  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2880  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_BE, $2, $3, $4, 1, 1);
  2881  	}
  2882  
  2883  JIT_OP_DGT:
  2884  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2885  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NBE, $2, (void *)$3, $4, 1, 0);
  2886  	}
  2887  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2888  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NBE, $2, $3, $4, 1, 0);
  2889  	}
  2890  
  2891  JIT_OP_DGT_INV:
  2892  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2893  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NBE, $2, (void *)$3, $4, 1, 1);
  2894  	}
  2895  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2896  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NBE, $2, $3, $4, 1, 1);
  2897  	}
  2898  
  2899  JIT_OP_DGE:
  2900  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2901  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NC, $2, (void *)$3, $4, 1, 0);
  2902  	}
  2903  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2904  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NC, $2, $3, $4, 1, 0);
  2905  	}
  2906  
  2907  JIT_OP_DGE_INV:
  2908  	[=+reg, xreg, imm, scratch reg, space("24")] -> {
  2909  		inst = xmm_cmp_setcc_reg_imm(gen, inst, $1, X86_CC_NC, $2, (void *)$3, $4, 1, 1);
  2910  	}
  2911  	[=+reg, xreg, xreg, scratch reg, space("20")] -> {
  2912  		inst = xmm_cmp_setcc_reg_reg(inst, $1, X86_CC_NC, $2, $3, $4, 1, 1);
  2913  	}
  2914  
  2915  JIT_OP_FSQRT:
  2916  	[=xreg, local] -> {
  2917  		x86_64_sqrtss_reg_membase(inst, $1, X86_64_RBP, $2);
  2918  	}
  2919  	[=xreg, xreg] -> {
  2920  		x86_64_sqrtss_reg_reg(inst, $1, $2);
  2921  	}
  2922  
  2923  JIT_OP_DSQRT:
  2924  	[=xreg, local] -> {
  2925  		x86_64_sqrtsd_reg_membase(inst, $1, X86_64_RBP, $2);
  2926  	}
  2927  	[=xreg, xreg] -> {
  2928  		x86_64_sqrtsd_reg_reg(inst, $1, $2);
  2929  	}
  2930  
  2931  /*
  2932   * Absolute, minimum, maximum, and sign.
  2933   */
  2934  JIT_OP_IMAX: commutative
  2935  	[reg, reg] -> {
  2936  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2937  		x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 4);
  2938  	}
  2939  
  2940  JIT_OP_IMAX_UN: commutative
  2941  	[reg, reg] -> {
  2942  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2943  		x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 4);
  2944  	}
  2945  
  2946  JIT_OP_IMIN: commutative
  2947  	[reg, reg] -> {
  2948  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2949  		x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 4);
  2950  	}
  2951  
  2952  JIT_OP_IMIN_UN: commutative
  2953  	[reg, reg] -> {
  2954  		x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
  2955  		x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 4);
  2956  	}
  2957  
  2958  JIT_OP_ISIGN:
  2959  	[=reg, imm] -> {
  2960  		if($2 < 0)
  2961  		{
  2962  			x86_64_mov_reg_imm_size(inst, $1, -1, 4);
  2963  		}
  2964  		else if($2 > 0)
  2965  		{
  2966  			x86_64_mov_reg_imm_size(inst, $1, 1, 4);
  2967  		}
  2968  		else
  2969  		{
  2970  			x86_64_clear_reg(inst, $1);
  2971  		}
  2972  	}
  2973  	[=+reg, +reg] -> {
  2974  		x86_64_clear_reg(inst, $1);
  2975  		x86_64_test_reg_reg_size(inst, $2, $2, 4);
  2976  		x86_64_set_reg(inst, X86_CC_NZ, $1, 0);
  2977  		x86_64_sar_reg_imm_size(inst, $2, 31, 4);
  2978  		x86_64_or_reg_reg_size(inst, $1, $2, 4);
  2979  	}
  2980  
  2981  JIT_OP_LMAX: commutative
  2982  	[reg, reg] -> {
  2983  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2984  		x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 8);
  2985  	}
  2986  
  2987  JIT_OP_LMAX_UN: commutative
  2988  	[reg, reg] -> {
  2989  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2990  		x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 8);
  2991  	}
  2992  
  2993  JIT_OP_LMIN: commutative
  2994  	[reg, reg] -> {
  2995  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  2996  		x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 8);
  2997  	}
  2998  
  2999  JIT_OP_LMIN_UN: commutative
  3000  	[reg, reg] -> {
  3001  		x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
  3002  		x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 8);
  3003  	}
  3004  
  3005  JIT_OP_LSIGN:
  3006  	[=reg, imm] -> {
  3007  		if($2 < 0)
  3008  		{
  3009  			x86_64_mov_reg_imm_size(inst, $1, -1, 4);
  3010  		}
  3011  		else if($2 > 0)
  3012  		{
  3013  			x86_64_mov_reg_imm_size(inst, $1, 1, 4);
  3014  		}
  3015  		else
  3016  		{
  3017  			x86_64_clear_reg(inst, $1);
  3018  		}
  3019  	}
  3020  	[=+reg, +reg] -> {
  3021  		x86_64_clear_reg(inst, $1);
  3022  		x86_64_test_reg_reg_size(inst, $2, $2, 8);
  3023  		x86_64_set_reg(inst, X86_CC_NZ, $1, 0);
  3024  		x86_64_sar_reg_imm_size(inst, $2, 63, 8);
  3025  		x86_64_or_reg_reg_size(inst, $1, $2, 4);
  3026  	}
  3027  
  3028  JIT_OP_FMAX: commutative
  3029  	[xreg, local] -> {
  3030  		x86_64_maxss_reg_membase(inst, $1, X86_64_RBP, $2);
  3031  	}
  3032  	[xreg, xreg] -> {
  3033  		x86_64_maxss_reg_reg(inst, $1, $2);
  3034  	}
  3035  
  3036  JIT_OP_FMIN: commutative
  3037  	[xreg, local] -> {
  3038  		x86_64_minss_reg_membase(inst, $1, X86_64_RBP, $2);
  3039  	}
  3040  	[xreg, xreg] -> {
  3041  		x86_64_minss_reg_reg(inst, $1, $2);
  3042  	}
  3043  
  3044  JIT_OP_DMAX: commutative
  3045  	[xreg, local] -> {
  3046  		x86_64_maxsd_reg_membase(inst, $1, X86_64_RBP, $2);
  3047  	}
  3048  	[xreg, xreg] -> {
  3049  		x86_64_maxsd_reg_reg(inst, $1, $2);
  3050  	}
  3051  
  3052  JIT_OP_DMIN: commutative
  3053  	[xreg, local] -> {
  3054  		x86_64_minsd_reg_membase(inst, $1, X86_64_RBP, $2);
  3055  	}
  3056  	[xreg, xreg] -> {
  3057  		x86_64_minsd_reg_reg(inst, $1, $2);
  3058  	}
  3059  
  3060  /*
  3061   * Rounding
  3062   */
  3063  JIT_OP_FFLOOR: more_space
  3064  	[=xreg, local, scratch reg] -> {
  3065  		inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_DOWN);
  3066  	}
  3067  	[=xreg, xreg, scratch reg] -> {
  3068  		inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
  3069  	}
  3070  
  3071  JIT_OP_DFLOOR: more_space
  3072  	[=xreg, local, scratch reg] -> {
  3073  		inst = x86_64_roundd_reg_membase(inst, $1, $2, $3, X86_ROUND_DOWN);
  3074  	}
  3075  	[=xreg, xreg, scratch reg] -> {
  3076  		inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
  3077  	}
  3078  
  3079  JIT_OP_NFFLOOR: more_space
  3080  	[freg, scratch reg] -> {
  3081  		inst = x86_64_roundnf(inst, $2, X86_ROUND_DOWN);
  3082  	}
  3083  
  3084  JIT_OP_FCEIL: more_space
  3085  	[=xreg, local, scratch reg] -> {
  3086  		inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_UP);
  3087  	}
  3088  	[=xreg, xreg, scratch reg] -> {
  3089  		inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
  3090  	}
  3091  
  3092  JIT_OP_DCEIL: more_space
  3093  	[=xreg, local, scratch reg] -> {
  3094  		inst = x86_64_roundd_reg_membase(inst, $1, $2, $3, X86_ROUND_UP);
  3095  	}
  3096  	[=xreg, xreg, scratch reg] -> {
  3097  		inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
  3098  	}
  3099  
  3100  JIT_OP_NFCEIL: more_space
  3101  	[freg, scratch reg] -> {
  3102  		inst = x86_64_roundnf(inst, $2, X86_ROUND_UP);
  3103  	}
  3104  
  3105  /*
  3106  JIT_OP_FRINT: more_space
  3107  	[=xreg, local, scratch reg] -> {
  3108  		inst = x86_64_rounds_reg_membase(inst, $1, $2, $3, X86_ROUND_ZERO);
  3109  	}
  3110  	[=xreg, xreg, scratch reg] -> {
  3111  		inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_ZERO);
  3112  	}
  3113  */
  3114  
  3115  /*
  3116   * Pointer check opcodes.
  3117   */
  3118  
  3119  JIT_OP_CHECK_NULL: note
  3120  	[reg] -> {
  3121  #if 0 && defined(JIT_USE_SIGNALS)
  3122  		/* if $1 contains NULL this generates SEGV and the signal
  3123  		   handler will throw the exception  */
  3124  		x86_64_cmp_reg_membase_size(inst, $1, $1, 0, 8);
  3125  #else
  3126  		unsigned char *patch;
  3127  		x86_64_test_reg_reg_size(inst, $1, $1, 8);
  3128  		patch = inst;
  3129  		x86_branch8(inst, X86_CC_NE, 0, 0);
  3130  		inst = throw_builtin(inst, func, JIT_RESULT_NULL_REFERENCE);
  3131  		x86_patch(patch, inst);
  3132  #endif
  3133  	}
  3134  
  3135  /*
  3136   * Function calls.
  3137   */
  3138  
  3139  JIT_OP_CALL:
  3140  	[] -> {
  3141  		jit_function_t func = (jit_function_t)(insn->dest);
  3142  		inst = x86_64_call_code(inst, (jit_nint)jit_function_to_closure(func));
  3143  	}
  3144  
  3145  JIT_OP_CALL_TAIL:
  3146  	[] -> {
  3147  		jit_function_t func = (jit_function_t)(insn->dest);
  3148  		x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
  3149  		x86_64_pop_reg_size(inst, X86_64_RBP, 8);
  3150  		x86_64_jump_to_code(inst, (jit_nint)jit_function_to_closure(func));
  3151  	}
  3152  
  3153  JIT_OP_CALL_INDIRECT:
  3154  	[] -> {
  3155  		x86_64_mov_reg_imm_size(inst, X86_64_RAX, 8, 4);
  3156  		x86_64_call_reg(inst, X86_64_SCRATCH);
  3157  	}
  3158  
  3159  JIT_OP_CALL_INDIRECT_TAIL:
  3160  	[] -> {
  3161  		x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
  3162  		x86_64_pop_reg_size(inst, X86_64_RBP, 8);
  3163  		x86_64_jmp_reg(inst, X86_64_SCRATCH);
  3164  	}
  3165  
  3166  JIT_OP_CALL_VTABLE_PTR:
  3167  	[] -> {
  3168  		x86_64_mov_reg_imm_size(inst, X86_64_RAX, 8, 4);
  3169  		x86_64_call_reg(inst, X86_64_SCRATCH);
  3170  	}
  3171  
  3172  JIT_OP_CALL_VTABLE_PTR_TAIL:
  3173  	[] -> {
  3174  		x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
  3175  		x86_64_pop_reg_size(inst, X86_64_RBP, 8);
  3176  		x86_64_jmp_reg(inst, X86_64_SCRATCH);
  3177  	}
  3178  
  3179  JIT_OP_CALL_EXTERNAL:
  3180  	[] -> {
  3181  		inst = x86_64_call_code(inst, (jit_nint)(insn->dest));
  3182  	}
  3183  
  3184  JIT_OP_CALL_EXTERNAL_TAIL:
  3185  	[] -> {
  3186  		x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
  3187  		x86_64_pop_reg_size(inst, X86_64_RBP, 8);
  3188  		x86_64_jump_to_code(inst, (jit_nint)(insn->dest));
  3189  	}
  3190  
  3191  
  3192  /*
  3193   * Exception handling.
  3194   */
  3195  
  3196  JIT_OP_THROW: branch
  3197  	[reg] -> {
  3198  		x86_64_mov_reg_reg_size(inst, X86_64_RDI, $1, 8);
  3199  		if(func->builder->setjmp_value != 0)
  3200  		{
  3201  			jit_nint pc_offset;
  3202  
  3203  			/* We have a "setjmp" block in the current function,
  3204  			   so we must record the location of the throw first */
  3205  			_jit_gen_fix_value(func->builder->setjmp_value);
  3206  			pc_offset = func->builder->setjmp_value->frame_offset +
  3207  							jit_jmp_catch_pc_offset;
  3208  
  3209  			x86_64_lea_membase_size(inst, X86_64_SCRATCH, X86_64_RIP, 0, 8);
  3210  			x86_64_mov_membase_reg_size(inst, X86_64_RBP, pc_offset,
  3211  										X86_64_SCRATCH, 8);
  3212  		}
  3213  		inst = x86_64_call_code(inst, (jit_nint)jit_exception_throw);
  3214  	}
  3215  
  3216  JIT_OP_RETHROW: manual
  3217  	[] -> { /* Not used in native code back ends */ }
  3218  
  3219  JIT_OP_LOAD_PC:
  3220  	[=reg] -> {
  3221  		x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8);
  3222  	}
  3223  
  3224  JIT_OP_LOAD_EXCEPTION_PC: manual
  3225  	[] -> { /* Not used in native code back ends */ }
  3226  
  3227  JIT_OP_ENTER_FINALLY:
  3228  	[] -> {
  3229  		/* The return address is on the stack */
  3230  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
  3231  	 }
  3232  
  3233  JIT_OP_LEAVE_FINALLY: branch
  3234  	[] -> {
  3235  		/* The "finally" return address is on the stack */
  3236  		x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
  3237  		x86_64_ret(inst);
  3238  	}
  3239  
  3240  JIT_OP_CALL_FINALLY: branch
  3241  	[] -> {
  3242  		jit_block_t block;
  3243  
  3244  		block = jit_block_from_label(func, (jit_label_t)(insn->dest));
  3245  		if(!block)
  3246  		{
  3247  			return;
  3248  		}
  3249  
  3250  		if(block->address)
  3251  		{
  3252  			inst = x86_64_call_code(inst, (jit_nint)block->address);
  3253  		}
  3254  		else
  3255  		{
  3256  			jit_int fixup;
  3257  
  3258  			if(block->fixup_list)
  3259  			{
  3260  				fixup = _JIT_CALC_FIXUP(block->fixup_list, inst + 1);
  3261  			}
  3262  			else
  3263  			{
  3264  				fixup = 0;
  3265  			}
  3266  			block->fixup_list = (void *)(inst + 1);
  3267  			x86_64_call_imm(inst, fixup);
  3268  		}
  3269  	}
  3270  
  3271  JIT_OP_ADDRESS_OF_LABEL:
  3272  	[=reg] -> {
  3273  		jit_int *fixup;
  3274  
  3275  		block = jit_block_from_label(func, (jit_label_t)(insn->value1));
  3276  		if(block->address)
  3277  		{
  3278  			/* The label is in the current function so we assume that the */
  3279  			/* displacement to the current instruction is in the +-2GB range */
  3280  
  3281  			x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8);
  3282  			fixup = (jit_int *)(inst - 4);
  3283  			fixup[0] = (jit_int)((jit_nint)block->address - (jit_nint)inst);
  3284  		}
  3285  		else
  3286  		{
  3287  			/* Output a placeholder and record on the block's fixup list */
  3288  			/* The label is in the current function so we assume that the */
  3289  			/* displacement to the current instruction will be in the +-2GB range */
  3290  			x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8);
  3291  			fixup = (jit_int *)(inst - 4);
  3292  			if(block->fixup_list)
  3293  			{
  3294  				fixup[0] = _JIT_CALC_FIXUP(block->fixup_list, fixup);
  3295  			}
  3296  			block->fixup_list = (void *)fixup;
  3297  		}
  3298  	}
  3299  
  3300  /*
  3301   * Block operations.
  3302   */
  3303  
  3304  JIT_OP_MEMCPY: ternary
  3305  	[any, any, imm, if("$3 <= 0")] -> { }
  3306  	[reg, reg, imm, scratch reg, scratch xreg,
  3307  		if("$3 <= _JIT_MAX_MEMCPY_INLINE")] -> {
  3308  		inst = small_block_copy(gen, inst, $1, 0, $2, 0, $3, $4, $5, 0);
  3309  	}
  3310  	[reg, reg, imm, clobber(creg), clobber(xreg)] -> {
  3311  		inst = memory_copy(gen, inst, $1, 0, $2, 0, $3);
  3312  	}
  3313  	[reg("rdi"), reg("rsi"), reg("rdx"), clobber(creg), clobber(xreg)] -> {
  3314  		inst = x86_64_call_code(inst, (jit_nint)jit_memcpy);
  3315  	}
  3316  
  3317  JIT_OP_MEMSET: ternary
  3318  	[any, any, imm, if("$3 <= 0")] -> { }
  3319  	[reg, imm, imm, scratch xreg,
  3320  		if("$2 == 0 && $3 <= _JIT_MAX_MEMSET_INLINE && $3 % 16 == 0")] -> {
  3321  		inst = small_block_set(gen, inst, $1, 0, $2, $3, 0, $4, 0, 1);
  3322  	}
  3323  	[reg, imm, imm, scratch reg,
  3324  		if("$3 <= _JIT_MAX_MEMSET_INLINE && $3 < 32")] -> {
  3325  		inst = small_block_set(gen, inst, $1, 0, $2, $3, $4, 0, 0, 0);
  3326  	}
  3327  	[reg, imm, imm, scratch reg, scratch xreg,
  3328  		if("$3 <= _JIT_MAX_MEMSET_INLINE")] -> {
  3329  		inst = small_block_set(gen, inst, $1, 0, $2, $3, $4, $5, 0, 1);
  3330  	}
  3331  	[reg("rdi"), reg("rsi"), reg("rdx"), clobber(creg), clobber(xreg)] -> {
  3332  		inst = x86_64_call_code(inst, (jit_nint)jit_memset);
  3333  	}
  3334  
  3335  JIT_OP_ALLOCA:
  3336  	[reg] -> {
  3337  		x86_64_sub_reg_reg_size(inst, X86_64_RSP, $1, 8);
  3338  		x86_64_mov_reg_reg_size(inst, $1, X86_64_RSP, 8);
  3339  		inst = fixup_alloca(gen, inst, $1);
  3340  		gen->stack_changed = 1;
  3341  }
  3342  
  3343  JIT_OP_JUMP_TABLE: ternary, branch
  3344  	[reg, imm, imm, scratch reg, space("64 + sizeof(void *) * $3")] -> {
  3345  		unsigned char *patch_jump_table;
  3346  		unsigned char *patch_fall_through;
  3347  		int index;
  3348  		jit_label_t *labels;
  3349  		jit_nint num_labels;
  3350  		jit_block_t block;
  3351  
  3352  		labels = (jit_label_t *) $2;
  3353  		num_labels = $3;
  3354  
  3355  		patch_jump_table = (unsigned char *)_jit_gen_alloc(gen, sizeof(void *) * $3);
  3356  		if(!patch_jump_table)
  3357  		{
  3358  			/* The cache is full */
  3359  			return;
  3360  		}
  3361  
  3362  		x86_64_mov_reg_imm_size(inst, $4, (jit_nint)patch_jump_table, 8);
  3363  		x86_64_cmp_reg_imm_size(inst, $1, num_labels, 8);
  3364  		patch_fall_through = inst;
  3365  		x86_branch32(inst, X86_CC_AE, 0, 0);
  3366  
  3367  		if(func->builder->position_independent)
  3368  		{
  3369  			/* TODO */
  3370  			TODO();
  3371  		}
  3372  		else
  3373  		{
  3374  			x86_64_jmp_memindex(inst, $4, 0, $1, 3);
  3375  		}
  3376  
  3377  		for(index = 0; index < num_labels; index++)
  3378  		{
  3379  			block = jit_block_from_label(func, labels[index]);
  3380  			if(!block)
  3381  			{
  3382  				return;
  3383  			}
  3384  
  3385  			if(func->builder->position_independent)
  3386  			{
  3387  				/* TODO */
  3388  				TODO();
  3389  			}
  3390  			else
  3391  			{
  3392  				if(block->address)
  3393  				{
  3394  					x86_64_imm_emit64(patch_jump_table, (jit_nint)(block->address));
  3395  				}
  3396  				else
  3397  				{
  3398  					/* Output a placeholder and record on the block's absolute fixup list */
  3399  					x86_64_imm_emit64(patch_jump_table, (jit_nint)(block->fixup_absolute_list));
  3400  					block->fixup_absolute_list = (void *)(patch_jump_table - 8);
  3401  				}
  3402  			}
  3403  		}
  3404  
  3405  		x86_patch(patch_fall_through, inst);
  3406  	}