github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-rules-x86-64.c (about)

     1  /*
     2   * jit-rules-x86-64.c - Rules that define the characteristics of the x86_64.
     3   *
     4   * Copyright (C) 2008  Southern Storm Software, Pty Ltd.
     5   *
     6   * This file is part of the libjit library.
     7   *
     8   * The libjit library is free software: you can redistribute it and/or
     9   * modify it under the terms of the GNU Lesser General Public License
    10   * as published by the Free Software Foundation, either version 2.1 of
    11   * the License, or (at your option) any later version.
    12   *
    13   * The libjit library is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16   * Lesser General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU Lesser General Public
    19   * License along with the libjit library.  If not, see
    20   * <http://www.gnu.org/licenses/>.
    21   */
    22  
    23  #include "jit-internal.h"
    24  #include "jit-rules.h"
    25  #include "jit-apply-rules.h"
    26  
    27  #if defined(JIT_BACKEND_X86_64)
    28  
    29  #include "jit-gen-x86-64.h"
    30  #include "jit-reg-alloc.h"
    31  #include "jit-setjmp.h"
    32  #include <stdio.h>
    33  
    34  /*
    35   * Pseudo register numbers for the x86_64 registers. These are not the
    36   * same as the CPU instruction register numbers.  The order of these
    37   * values must match the order in "JIT_REG_INFO".
    38   */
    39  #define	X86_64_REG_RAX		0
    40  #define	X86_64_REG_RCX		1
    41  #define	X86_64_REG_RDX		2
    42  #define	X86_64_REG_RBX		3
    43  #define	X86_64_REG_RSI		4
    44  #define	X86_64_REG_RDI		5
    45  #define	X86_64_REG_R8		6
    46  #define	X86_64_REG_R9		7
    47  #define	X86_64_REG_R10		8
    48  #define	X86_64_REG_R11		9
    49  #define	X86_64_REG_R12		10
    50  #define	X86_64_REG_R13		11
    51  #define	X86_64_REG_R14		12
    52  #define	X86_64_REG_R15		13
    53  #define	X86_64_REG_RBP		14
    54  #define	X86_64_REG_RSP		15
    55  #define	X86_64_REG_XMM0		16
    56  #define	X86_64_REG_XMM1		17
    57  #define	X86_64_REG_XMM2		18
    58  #define	X86_64_REG_XMM3		19
    59  #define	X86_64_REG_XMM4		20
    60  #define	X86_64_REG_XMM5		21
    61  #define	X86_64_REG_XMM6		22
    62  #define	X86_64_REG_XMM7		23
    63  #define	X86_64_REG_XMM8		24
    64  #define	X86_64_REG_XMM9		25
    65  #define	X86_64_REG_XMM10	26
    66  #define	X86_64_REG_XMM11	27
    67  #define	X86_64_REG_XMM12	28
    68  #define	X86_64_REG_XMM13	29
    69  #define	X86_64_REG_XMM14	30
    70  #define	X86_64_REG_XMM15	31
    71  #define	X86_64_REG_ST0		32
    72  #define	X86_64_REG_ST1		33
    73  #define	X86_64_REG_ST2		34
    74  #define	X86_64_REG_ST3		35
    75  #define	X86_64_REG_ST4		36
    76  #define	X86_64_REG_ST5		37
    77  #define	X86_64_REG_ST6		38
    78  #define	X86_64_REG_ST7		39
    79  
    80  /*
    81   * Determine if a pseudo register number is general, xmm or fpu.
    82   */
    83  #define	IS_GENERAL_REG(reg)	(((reg) & ~0x0f) == 0)
    84  #define	IS_XMM_REG(reg)		(((reg) & ~0x0f) == 0x10)
    85  #define	IS_FPU_REG(reg)		(((reg) & ~0x0f) == 0x20)
    86  
    87  /*
    88   * Scratch register, that is used for calls via register and
    89   * for loading the exception pc to the setjmp buffer.
    90   * This register *MUST* not be used for parameter passing and
    91   * *MUST* not be a callee saved register.
    92   * For SysV abi R11 is perfect.
    93   */
    94  #define X86_64_SCRATCH X86_64_R11
    95  
    96  /*
    97   * Set this definition to 1 if the OS supports the SysV red zone.
    98   * This is a 128 byte area below the stack pointer that is guaranteed
    99   * to be not modified by interrupts or signal handlers.
   100   * This allows us to use a temporary area on the stack without
   101   * having to modify the stack pointer saving us two instructions.
   102   * TODO: Make this a configure switch.
   103   */
   104  #define HAVE_RED_ZONE 1
   105  
   106  /*
   107   * Some declarations that should be replaced by querying the cpuinfo
   108   * if generating code for the current cpu.
   109   */
   110  /*
   111  #define HAVE_X86_SSE_4_1 0
   112  #define HAVE_X86_SSE_4 0
   113  #define HAVE_X86_SSE_3 0
   114  #define HAVE_X86_FISTTP 0
   115  */
   116  
   117  #define	TODO() \
   118  do { \
   119  	fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
   120  } while(0)
   121  
   122  /*
   123   * Setup or teardown the x86 code output process.
   124   */
   125  #define	jit_cache_setup_output(needed)		\
   126  	unsigned char *inst = gen->ptr;		\
   127  	_jit_gen_check_space(gen, (needed))
   128  
   129  #define	jit_cache_end_output()	\
   130  	gen->ptr = inst
   131  
   132  /*
   133   * Set this to 1 for debugging fixups
   134   */
   135  #define DEBUG_FIXUPS 0
   136  
   137  /*
   138   * The maximum block size copied inline
   139   */
   140  #define _JIT_MAX_MEMCPY_INLINE	0x40
   141  
   142  /*
   143   * The maximum block size set inline
   144   */
   145  #define _JIT_MAX_MEMSET_INLINE 0x80
   146  
   147  /*
   148   * va_list type as specified in x86_64 sysv abi version 0.99
   149   * Figure 3.34
   150   */
   151  typedef struct
   152  {
   153  	unsigned int gp_offset;
   154  	unsigned int fp_offset;
   155  	void *overflow_arg_area;
   156  	void *reg_save_area;
   157  } _jit_va_list;
   158  
   159  /* Registers used for INTEGER arguments */
   160  static int _jit_word_arg_regs[] = {X86_64_REG_RDI, X86_64_REG_RSI,
   161  								   X86_64_REG_RDX, X86_64_REG_RCX,
   162  								   X86_64_REG_R8, X86_64_REG_R9};
   163  #define _jit_num_word_regs	6
   164  
   165  /* Registers used for float arguments */
   166  static int _jit_float_arg_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1,
   167  									X86_64_REG_XMM2, X86_64_REG_XMM3,
   168  									X86_64_REG_XMM4, X86_64_REG_XMM5,
   169  									X86_64_REG_XMM6, X86_64_REG_XMM7};
   170  #define _jit_num_float_regs	8
   171  
   172  /* Registers used for returning INTEGER values */
   173  static int _jit_word_return_regs[] = {X86_64_REG_RAX, X86_64_REG_RDX};
   174  #define _jit_num_word_return_regs	2
   175  
   176  /* Registers used for returning sse values */
   177  static int _jit_sse_return_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1};
   178  #define _jit_num_sse_return_regs	2
   179  
   180  /*
   181   * X86_64 register classes
   182   */
   183  static _jit_regclass_t *x86_64_reg;		/* X86_64 general purpose registers */
   184  static _jit_regclass_t *x86_64_creg;	/* X86_64 call clobbered general */
   185  										/* purpose registers */
   186  static _jit_regclass_t *x86_64_dreg;	/* general purpose registers that */
   187  										/* can be used as divisor */
   188  										/* (all but %rax and %rdx) */
   189  static _jit_regclass_t *x86_64_rreg;	/* general purpose registers not used*/
   190  										/* for returning values */
   191  static _jit_regclass_t *x86_64_sreg;	/* general purpose registers that can*/
   192  										/* be used for the value to be */
   193  										/* shifted (all but %rcx)*/
   194  										/* for returning values */
   195  static _jit_regclass_t *x86_64_freg;	/* X86_64 fpu registers */
   196  static _jit_regclass_t *x86_64_xreg;	/* X86_64 xmm registers */
   197  
   198  void
   199  _jit_init_backend(void)
   200  {
   201  	x86_64_reg = _jit_regclass_create(
   202  		"reg", JIT_REG_WORD | JIT_REG_LONG, 14,
   203  		X86_64_REG_RAX, X86_64_REG_RCX,
   204  		X86_64_REG_RDX, X86_64_REG_RBX,
   205  		X86_64_REG_RSI, X86_64_REG_RDI,
   206  		X86_64_REG_R8, X86_64_REG_R9,
   207  		X86_64_REG_R10, X86_64_REG_R11,
   208  		X86_64_REG_R12, X86_64_REG_R13,
   209  		X86_64_REG_R14, X86_64_REG_R15);
   210  
   211  	/* register class with all call clobbered registers */
   212  	x86_64_creg = _jit_regclass_create(
   213  		"creg", JIT_REG_WORD | JIT_REG_LONG, 9,
   214  		X86_64_REG_RAX, X86_64_REG_RCX,
   215  		X86_64_REG_RDX, X86_64_REG_RSI,
   216  		X86_64_REG_RDI, X86_64_REG_R8,
   217  		X86_64_REG_R9, X86_64_REG_R10,
   218  		X86_64_REG_R11);
   219  
   220  	/* r egister class for divisors */
   221  	x86_64_dreg = _jit_regclass_create(
   222  		"dreg", JIT_REG_WORD | JIT_REG_LONG, 12,
   223  		X86_64_REG_RCX, X86_64_REG_RBX,
   224  		X86_64_REG_RSI, X86_64_REG_RDI,
   225  		X86_64_REG_R8, X86_64_REG_R9,
   226  		X86_64_REG_R10, X86_64_REG_R11,
   227  		X86_64_REG_R12, X86_64_REG_R13,
   228  		X86_64_REG_R14, X86_64_REG_R15);
   229  
   230  	/* register class with all registers not used for returning values */
   231  	x86_64_rreg = _jit_regclass_create(
   232  		"rreg", JIT_REG_WORD | JIT_REG_LONG, 12,
   233  		X86_64_REG_RCX, X86_64_REG_RBX,
   234  		X86_64_REG_RSI, X86_64_REG_RDI,
   235  		X86_64_REG_R8, X86_64_REG_R9,
   236  		X86_64_REG_R10, X86_64_REG_R11,
   237  		X86_64_REG_R12, X86_64_REG_R13,
   238  		X86_64_REG_R14, X86_64_REG_R15);
   239  
   240  	/* register class with all registers that can be used for shifted values */
   241  	x86_64_sreg = _jit_regclass_create(
   242  		"sreg", JIT_REG_WORD | JIT_REG_LONG, 13,
   243  		X86_64_REG_RAX, X86_64_REG_RDX,
   244  		X86_64_REG_RBX, X86_64_REG_RSI,
   245  		X86_64_REG_RDI, X86_64_REG_R8,
   246  		X86_64_REG_R9, X86_64_REG_R10,
   247  		X86_64_REG_R11, X86_64_REG_R12,
   248  		X86_64_REG_R13, X86_64_REG_R14,
   249  		X86_64_REG_R15);
   250  
   251  	x86_64_freg = _jit_regclass_create(
   252  		"freg", JIT_REG_X86_64_FLOAT | JIT_REG_IN_STACK, 8,
   253  		X86_64_REG_ST0, X86_64_REG_ST1,
   254  		X86_64_REG_ST2, X86_64_REG_ST3,
   255  		X86_64_REG_ST4, X86_64_REG_ST5,
   256  		X86_64_REG_ST6, X86_64_REG_ST7);
   257  
   258  	x86_64_xreg = _jit_regclass_create(
   259  		"xreg", JIT_REG_FLOAT32 | JIT_REG_FLOAT64, 16,
   260  		X86_64_REG_XMM0, X86_64_REG_XMM1,
   261  		X86_64_REG_XMM2, X86_64_REG_XMM3,
   262  		X86_64_REG_XMM4, X86_64_REG_XMM5,
   263  		X86_64_REG_XMM6, X86_64_REG_XMM7,
   264  		X86_64_REG_XMM8, X86_64_REG_XMM9,
   265  		X86_64_REG_XMM10, X86_64_REG_XMM11,
   266  		X86_64_REG_XMM12, X86_64_REG_XMM13,
   267  		X86_64_REG_XMM14, X86_64_REG_XMM15);
   268  }
   269  
   270  int
   271  _jit_opcode_is_supported(int opcode)
   272  {
   273  	switch(opcode)
   274  	{
   275  	#define JIT_INCLUDE_SUPPORTED
   276  	#include "jit-rules-x86-64.inc"
   277  	#undef JIT_INCLUDE_SUPPORTED
   278  	}
   279  	return 0;
   280  }
   281  
   282  int
   283  _jit_setup_indirect_pointer(jit_function_t func, jit_value_t value)
   284  {
   285  	return jit_insn_outgoing_reg(func, value, X86_64_REG_R11);
   286  }
   287  
   288  /*
   289   * Do a xmm operation with a constant float32 value
   290   */
   291  static int
   292  _jit_xmm1_reg_imm_size_float32(jit_gencode_t gen, unsigned char **inst_ptr,
   293  			       X86_64_XMM1_OP opc, int reg,
   294  			       jit_float32 *float32_value)
   295  {
   296  	void *ptr;
   297  	jit_nint offset;
   298  	unsigned char *inst;
   299  
   300  	inst = *inst_ptr;
   301  	ptr = _jit_gen_alloc(gen, sizeof(jit_float32));
   302  	if(!ptr)
   303  	{
   304  		return 0;
   305  	}
   306  	jit_memcpy(ptr, float32_value, sizeof(jit_float32));
   307  
   308  	offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8));
   309  	if((offset >= jit_min_int) && (offset <= jit_max_int))
   310  	{
   311  		/* We can use RIP relative addressing here */
   312  		x86_64_xmm1_reg_membase(inst, opc, reg,
   313  									 X86_64_RIP, offset, 0);
   314  	}
   315  	else if(((jit_nint)ptr >= jit_min_int) &&
   316  			((jit_nint)ptr <= jit_max_int))
   317  	{
   318  		/* We can use absolute addressing */
   319  		x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 0);
   320  	}
   321  	else
   322  	{
   323  		/* We have to use an extra general register */
   324  		TODO();
   325  		return 0;
   326  	}
   327  	*inst_ptr = inst;
   328  	return 1;
   329  }
   330  
   331  /*
   332   * Do a xmm operation with a constant float64 value
   333   */
   334  static int
   335  _jit_xmm1_reg_imm_size_float64(jit_gencode_t gen, unsigned char **inst_ptr,
   336  			       X86_64_XMM1_OP opc, int reg,
   337  			       jit_float64 *float64_value)
   338  {
   339  	void *ptr;
   340  	jit_nint offset;
   341  	unsigned char *inst;
   342  
   343  	inst = *inst_ptr;
   344  	ptr = _jit_gen_alloc(gen, sizeof(jit_float64));
   345  	if(!ptr)
   346  	{
   347  		return 0;
   348  	}
   349  	jit_memcpy(ptr, float64_value, sizeof(jit_float64));
   350  
   351  	offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8));
   352  	if((offset >= jit_min_int) && (offset <= jit_max_int))
   353  	{
   354  		/* We can use RIP relative addressing here */
   355  		x86_64_xmm1_reg_membase(inst, opc, reg,
   356  									 X86_64_RIP, offset, 1);
   357  	}
   358  	else if(((jit_nint)ptr >= jit_min_int) &&
   359  			((jit_nint)ptr <= jit_max_int))
   360  	{
   361  		/* We can use absolute addressing */
   362  		x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 1);
   363  	}
   364  	else
   365  	{
   366  		/* We have to use an extra general register */
   367  		TODO();
   368  		return 0;
   369  	}
   370  	*inst_ptr = inst;
   371  	return 1;
   372  }
   373  
   374  /*
   375   * Do a logical xmm operation with packed float32 values
   376   */
   377  static int
   378  _jit_plops_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
   379  				   X86_64_XMM_PLOP opc, int reg, void *packed_value)
   380  {
   381  	void *ptr;
   382  	jit_nint offset;
   383  	unsigned char *inst;
   384  
   385  	inst = *inst_ptr;
   386  	ptr = _jit_gen_alloc(gen, 16);
   387  	if(!ptr)
   388  	{
   389  		return 0;
   390  	}
   391  	jit_memcpy(ptr, packed_value, 16);
   392  
   393  	/* calculate the offset for membase addressing */
   394  	offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 8 : 7));
   395  	if((offset >= jit_min_int) && (offset <= jit_max_int))
   396  	{
   397  		/* We can use RIP relative addressing here */
   398  		x86_64_plops_reg_membase(inst, opc, reg, X86_64_RIP, offset);
   399  		*inst_ptr = inst;
   400  		return 1;
   401  	}
   402  	/* Check if mem addressing can be used */
   403  	if(((jit_nint)ptr >= jit_min_int) &&
   404  		((jit_nint)ptr <= jit_max_int))
   405  	{
   406  		/* We can use absolute addressing */
   407  		x86_64_plops_reg_mem(inst, opc, reg, (jit_nint)ptr);
   408  		*inst_ptr = inst;
   409  		return 1;
   410  	}
   411  	/* We have to use an extra general register */
   412  	TODO();
   413  	return 0;
   414  }
   415  
   416  /*
   417   * Do a logical xmm operation with packed float64 values
   418   */
   419  static int
   420  _jit_plopd_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
   421  				   X86_64_XMM_PLOP opc, int reg, void *packed_value)
   422  {
   423  	void *ptr;
   424  	jit_nint offset;
   425  	unsigned char *inst;
   426  
   427  	inst = *inst_ptr;
   428  	ptr = _jit_gen_alloc(gen, 16);
   429  	if(!ptr)
   430  	{
   431  		return 0;
   432  	}
   433  	jit_memcpy(ptr, packed_value, 16);
   434  
   435  	/* calculate the offset for membase addressing */
   436  	offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8));
   437  	if((offset >= jit_min_int) && (offset <= jit_max_int))
   438  	{
   439  		/* We can use RIP relative addressing here */
   440  		x86_64_plopd_reg_membase(inst, opc, reg, X86_64_RIP, offset);
   441  		*inst_ptr = inst;
   442  		return 1;
   443  	}
   444  	/* Check if mem addressing can be used */
   445  	if(((jit_nint)ptr >= jit_min_int) &&
   446  		((jit_nint)ptr <= jit_max_int))
   447  	{
   448  		/* We can use absolute addressing */
   449  		x86_64_plopd_reg_mem(inst, opc, reg, (jit_nint)ptr);
   450  		*inst_ptr = inst;
   451  		return 1;
   452  	}
   453  	/* We have to use an extra general register */
   454  	TODO();
   455  	return 0;
   456  }
   457  
   458  /*
   459   * Helpers for saving and setting roundmode in the fpu control word
   460   * and restoring it afterwards.
   461   * The rounding mode bits are bit 10 and 11 in the fpu control word.
   462   * sp_offset is the start offset of a temporary eight byte block.
   463   */
   464  static unsigned char *
   465  _x86_64_set_fpu_roundmode(unsigned char *inst, int scratch_reg,
   466  						  int sp_offset, X86_64_ROUNDMODE mode)
   467  {
   468  	int fpcw_save_offset = sp_offset + 4;
   469  	int fpcw_new_offset = sp_offset;
   470  	int round_mode = ((int)mode) << 10;
   471  	int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 10);
   472  
   473  	/* store FPU control word */
   474  	x86_64_fnstcw_membase(inst, X86_64_RSP, fpcw_save_offset);
   475  	/* load the value into the scratch register */
   476  	x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, fpcw_save_offset, 2);
   477  	/* Set the rounding mode */
   478  	if(mode != X86_ROUND_ZERO)
   479  	{
   480  		/* Not all bits are set in the mask so we have to clear it first */
   481  		x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 2);
   482  	}
   483  	x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 2);
   484  	/* Store the new round mode */
   485  	x86_64_mov_membase_reg_size(inst, X86_64_RSP, fpcw_new_offset, scratch_reg, 2);
   486  	/* Now load the new control word */
   487  	x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_new_offset);
   488  
   489  	return inst;
   490  }
   491  
   492  static unsigned char *
   493  _x86_64_restore_fpcw(unsigned char *inst, int sp_offset)
   494  {
   495  	int fpcw_save_offset = sp_offset + 4;
   496  
   497  	/* Now load the saved control word */
   498  	x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_save_offset);
   499  
   500  	return inst;
   501  }
   502  
   503  /*
   504   * Helpers for saving and setting roundmode in the mxcsr register and
   505   * restoring it afterwards.
   506   * The rounding mode bits are bit 13 and 14 in the mxcsr register.
   507   * sp_offset is the start offset of a temporary eight byte block.
   508   */
   509  static unsigned char *
   510  _x86_64_set_xmm_roundmode(unsigned char *inst, int scratch_reg,
   511  						  int sp_offset, X86_64_ROUNDMODE mode)
   512  {
   513  	int mxcsr_save_offset = sp_offset + 4;
   514  	int mxcsr_new_offset = sp_offset;
   515  	int round_mode = ((int)mode) << 13;
   516  	int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 13);
   517  
   518  	/* save the mxcsr register */
   519  	x86_64_stmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
   520  	/* Load the contents of the mxcsr register into the scratch register */
   521  	x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, mxcsr_save_offset, 4);
   522  	/* Set the rounding mode */
   523  	if(mode != X86_ROUND_ZERO)
   524  	{
   525  		/* Not all bits are set in the mask so we have to clear it first */
   526  		x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 4);
   527  	}
   528  	x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 4);
   529  	/* Store the new round mode */
   530  	x86_64_mov_membase_reg_size(inst, X86_64_RSP, mxcsr_new_offset, scratch_reg, 4);
   531  	/* and load it to the mxcsr register */
   532  	x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_new_offset);
   533  
   534  	return inst;
   535  }
   536  
   537  static unsigned char *
   538  _x86_64_restore_mxcsr(unsigned char *inst, int sp_offset)
   539  {
   540  	int mxcsr_save_offset = sp_offset + 4;
   541  
   542  	/* restore the mxcsr register */
   543  	x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
   544  
   545  	return inst;
   546  }
   547  
   548  /*
   549   * perform rounding of scalar single precision values.
   550   * We have to use the fpu where see4.1 is not supported.
   551   */
   552  static unsigned char *
   553  x86_64_rounds_reg_reg(unsigned char *inst, int dreg, int sreg,
   554  					  int scratch_reg, X86_64_ROUNDMODE mode)
   555  {
   556  #ifdef HAVE_RED_ZONE
   557  #ifdef HAVE_X86_SSE_4_1
   558  	x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
   559  #else
   560  	/* Copy the xmm register to the stack */
   561  	x86_64_movss_membase_reg(inst, X86_64_RSP, -16, sreg);
   562  	/* Set the fpu round mode */
   563  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
   564  	/* Load the value to the fpu */
   565  	x86_64_fld_membase_size(inst, X86_64_RSP, -16, 4);
   566  	/* And round it to integer */
   567  	x86_64_frndint(inst);
   568  	/* restore the fpu control word */
   569  	inst = _x86_64_restore_fpcw(inst, -8);
   570  	/* and move st(0) to the destination register */
   571  	x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
   572  	x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
   573  #endif
   574  #else
   575  #ifdef HAVE_X86_SSE_4_1
   576  	x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
   577  #else
   578  	/* allocate space on the stack for two ints and one long value */
   579  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   580  	/* Copy the xmm register to the stack */
   581  	x86_64_movss_regp_reg(inst, X86_64_RSP, sreg);
   582  	/* Set the fpu round mode */
   583  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
   584  	/* Load the value to the fpu */
   585  	x86_64_fld_regp_size(inst, X86_64_RSP, 4);
   586  	/* And round it to integer */
   587  	x86_64_frndint(inst);
   588  	/* restore the fpu control word */
   589  	inst = _x86_64_restore_fpcw(inst, 8);
   590  	/* and move st(0) to the destination register */
   591  	x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
   592  	x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
   593  	/* restore the stack pointer */
   594  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
   595  #endif
   596  #endif
   597  	return inst;
   598  }
   599  
   600  static unsigned char *
   601  x86_64_rounds_reg_membase(unsigned char *inst, int dreg, int offset,
   602  						  int scratch_reg, X86_64_ROUNDMODE mode)
   603  {
   604  #ifdef HAVE_RED_ZONE
   605  #ifdef HAVE_X86_SSE_4_1
   606  	x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
   607  #else
   608  	/* Load the value to the fpu */
   609  	x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
   610  	/* Set the fpu round mode */
   611  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
   612  	/* And round it to integer */
   613  	x86_64_frndint(inst);
   614  	/* restore the fpu control word */
   615  	inst = _x86_64_restore_fpcw(inst, -8);
   616  	/* and move st(0) to the destination register */
   617  	x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
   618  	x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
   619  #endif
   620  #else
   621  #ifdef HAVE_X86_SSE_4_1
   622  	x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
   623  #else
   624  	/* allocate space on the stack for two ints and one long value */
   625  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   626  	/* Load the value to the fpu */
   627  	x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
   628  	/* Set the fpu round mode */
   629  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
   630  	/* And round it to integer */
   631  	x86_64_frndint(inst);
   632  	/* restore the fpu control word */
   633  	inst = _x86_64_restore_fpcw(inst, 8);
   634  	/* and move st(0) to the destination register */
   635  	x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
   636  	x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
   637  	/* restore the stack pointer */
   638  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
   639  #endif
   640  #endif
   641  	return inst;
   642  }
   643  
   644  /*
   645   * perform rounding of scalar double precision values.
   646   * We have to use the fpu where see4.1 is not supported.
   647   */
   648  static unsigned char *
   649  x86_64_roundd_reg_reg(unsigned char *inst, int dreg, int sreg,
   650  					  int scratch_reg, X86_64_ROUNDMODE mode)
   651  {
   652  #ifdef HAVE_RED_ZONE
   653  #ifdef HAVE_X86_SSE_4_1
   654  	x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
   655  #else
   656  	/* Copy the xmm register to the stack */
   657  	x86_64_movsd_membase_reg(inst, X86_64_RSP, -16, sreg);
   658  	/* Set the fpu round mode */
   659  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
   660  	/* Load the value to the fpu */
   661  	x86_64_fld_membase_size(inst, X86_64_RSP, -16, 8);
   662  	/* And round it to integer */
   663  	x86_64_frndint(inst);
   664  	/* restore the fpu control word */
   665  	inst = _x86_64_restore_fpcw(inst, -8);
   666  	/* and move st(0) to the destination register */
   667  	x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
   668  	x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
   669  #endif
   670  #else
   671  #ifdef HAVE_X86_SSE_4_1
   672  	x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
   673  #else
   674  	/* allocate space on the stack for two ints and one long value */
   675  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   676  	/* Copy the xmm register to the stack */
   677  	x86_64_movsd_regp_reg(inst, X86_64_RSP, sreg);
   678  	/* Set the fpu round mode */
   679  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
   680  	/* Load the value to the fpu */
   681  	x86_64_fld_regp_size(inst, X86_64_RSP, 8);
   682  	/* And round it to integer */
   683  	x86_64_frndint(inst);
   684  	/* restore the fpu control word */
   685  	inst = _x86_64_restore_fpcw(inst, 8);
   686  	/* and move st(0) to the destination register */
   687  	x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
   688  	x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
   689  	/* restore the stack pointer */
   690  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
   691  #endif
   692  #endif
   693  	return inst;
   694  }
   695  
   696  static unsigned char *
   697  x86_64_roundd_reg_membase(unsigned char *inst, int dreg, int offset,
   698  						  int scratch_reg, X86_64_ROUNDMODE mode)
   699  {
   700  #ifdef HAVE_RED_ZONE
   701  #ifdef HAVE_X86_SSE_4_1
   702  	x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
   703  #else
   704  	/* Load the value to the fpu */
   705  	x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
   706  	/* Set the fpu round mode */
   707  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
   708  	/* And round it to integer */
   709  	x86_64_frndint(inst);
   710  	/* restore the fpu control word */
   711  	inst = _x86_64_restore_fpcw(inst, -8);
   712  	/* and move st(0) to the destination register */
   713  	x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
   714  	x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
   715  #endif
   716  #else
   717  #ifdef HAVE_X86_SSE_4_1
   718  	x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
   719  #else
   720  	/* allocate space on the stack for two ints and one long value */
   721  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   722  	/* Load the value to the fpu */
   723  	x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
   724  	/* Set the fpu round mode */
   725  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
   726  	/* And round it to integer */
   727  	x86_64_frndint(inst);
   728  	/* restore the fpu control word */
   729  	inst = _x86_64_restore_fpcw(inst, 8);
   730  	/* and move st(0) to the destination register */
   731  	x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
   732  	x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
   733  	/* restore the stack pointer */
   734  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
   735  #endif
   736  #endif
   737  	return inst;
   738  }
   739  
   740  /*
   741   * Round the value in St(0) to integer according to the rounding
   742   * mode specified.
   743   */
   744  static unsigned char *
   745  x86_64_roundnf(unsigned char *inst, int scratch_reg, X86_64_ROUNDMODE mode)
   746  {
   747  #ifdef HAVE_RED_ZONE
   748  	/* Set the fpu round mode */
   749  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
   750  	/* And round it to integer */
   751  	x86_64_frndint(inst);
   752  	/* restore the fpu control word */
   753  	inst = _x86_64_restore_fpcw(inst, -8);
   754  #else
   755  	/* allocate space on the stack for two ints and one long value */
   756  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   757  	/* Set the fpu round mode */
   758  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 0, mode);
   759  	/* And round it to integer */
   760  	x86_64_frndint(inst);
   761  	/* restore the fpu control word */
   762  	inst = _x86_64_restore_fpcw(inst, 0);
   763  	/* restore the stack pointer */
   764  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   765  #endif
   766  	return inst;
   767  }
   768  
   769  /*
   770   * Round the value in the fpu register st(0) to integer and
   771   * store the value in dreg. St(0) is popped from the fpu stack.
   772   */
   773  static unsigned char *
   774  x86_64_nfloat_to_int(unsigned char *inst, int dreg, int scratch_reg, int size)
   775  {
   776  #ifdef HAVE_RED_ZONE
   777  #ifdef HAVE_X86_FISTTP
   778  	/* convert float to int */
   779  	x86_64_fisttp_membase_size(inst, X86_64_RSP, -8, 4);
   780  	/* move result to the destination */
   781  	x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -8, 4);
   782  #else
   783  	/* Set the fpu round mode */
   784  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, X86_ROUND_ZERO);
   785  	/* And round the value in st(0) to integer and store it on the stack */
   786  	x86_64_fistp_membase_size(inst, X86_64_RSP, -16, size);
   787  	/* restore the fpu control word */
   788  	inst = _x86_64_restore_fpcw(inst, -8);
   789  	/* and load the integer to the destination register */
   790  	x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -16, size);
   791  #endif
   792  #else
   793  #ifdef HAVE_X86_FISTTP
   794  	/* allocate space on the stack for one long value */
   795  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
   796  	/* convert float to int */
   797  	x86_64_fisttp_regp_size(inst, X86_64_RSP, 4);
   798  	/* move result to the destination */
   799  	x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, 4);
   800  	/* restore the stack pointer */
   801  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
   802  #else
   803  	/* allocate space on the stack for 2 ints and one long value */
   804  	x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
   805  	/* Set the fpu round mode */
   806  	inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, X86_ROUND_ZERO);
   807  	/* And round the value in st(0) to integer and store it on the stack */
   808  	x86_64_fistp_regp_size(inst, X86_64_RSP, size);
   809  	/* restore the fpu control word */
   810  	inst = _x86_64_restore_fpcw(inst, 8);
   811  	/* and load the integer to the destination register */
   812  	x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, size);
   813  	/* restore the stack pointer */
   814  	x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
   815  #endif
   816  #endif
   817  	return inst;
   818  }
   819  
   820  /*
   821   * Call a function
   822   */
   823  static unsigned char *
   824  x86_64_call_code(unsigned char *inst, jit_nint func)
   825  {
   826  	jit_nint offset;
   827  
   828  	x86_64_mov_reg_imm_size(inst, X86_64_RAX, 8, 4);
   829  	offset = func - ((jit_nint)inst + 5);
   830  	if(offset >= jit_min_int && offset <= jit_max_int)
   831  	{
   832  		/* We can use the immediate call */
   833  		x86_64_call_imm(inst, offset);
   834  	}
   835  	else
   836  	{
   837  		/* We have to do a call via register */
   838  		x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8);
   839  		x86_64_call_reg(inst, X86_64_SCRATCH);
   840  	}
   841  	return inst;
   842  }
   843  
   844  /*
   845   * Jump to a function
   846   */
   847  static unsigned char *
   848  x86_64_jump_to_code(unsigned char *inst, jit_nint func)
   849  {
   850  	jit_nint offset;
   851  
   852  	offset = func - ((jit_nint)inst + 5);
   853  	if(offset >= jit_min_int && offset <= jit_max_int)
   854  	{
   855  		/* We can use the immediate call */
   856  		x86_64_jmp_imm(inst, offset);
   857  	}
   858  	else
   859  	{
   860  		/* We have to do a call via register */
   861  		x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8);
   862  		x86_64_jmp_reg(inst, X86_64_SCRATCH);
   863  	}
   864  	return inst;
   865  }
   866  
   867  /*
   868   * Throw a builtin exception.
   869   */
   870  static unsigned char *
   871  throw_builtin(unsigned char *inst, jit_function_t func, int type)
   872  {
   873  	/* We need to update "catch_pc" if we have a "try" block */
   874  	if(func->builder->setjmp_value != 0)
   875  	{
   876  		_jit_gen_fix_value(func->builder->setjmp_value);
   877  
   878  		x86_64_lea_membase_size(inst, X86_64_RDI, X86_64_RIP, 0, 8);
   879  		x86_64_mov_membase_reg_size(inst, X86_64_RBP,
   880  					func->builder->setjmp_value->frame_offset
   881  					+ jit_jmp_catch_pc_offset, X86_64_RDI, 8);
   882  	}
   883  
   884  	/* Push the exception type onto the stack */
   885  	x86_64_mov_reg_imm_size(inst, X86_64_RDI, type, 4);
   886  
   887  	/* Call the "jit_exception_builtin" function, which will never return */
   888  	return x86_64_call_code(inst, (jit_nint)jit_exception_builtin);
   889  }
   890  
   891  /*
   892   * spill a register to it's place in the current stack frame.
   893   * The argument type must be in it's normalized form.
   894   */
   895  static void
   896  _spill_reg(unsigned char **inst_ptr, jit_type_t type,
   897  		   jit_int reg, jit_int offset)
   898  {
   899  	unsigned char *inst = *inst_ptr;
   900  
   901  	if(IS_GENERAL_REG(reg))
   902  	{
   903  		switch(type->kind)
   904  		{
   905  #if 0
   906  			case JIT_TYPE_SBYTE:
   907  			case JIT_TYPE_UBYTE:
   908  			{
   909  				x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   910  											_jit_reg_info[reg].cpu_reg, 1);
   911  			}
   912  			break;
   913  
   914  			case JIT_TYPE_SHORT:
   915  			case JIT_TYPE_USHORT:
   916  			{
   917  				x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   918  											_jit_reg_info[reg].cpu_reg, 2);
   919  			}
   920  			break;
   921  #else
   922  			case JIT_TYPE_SBYTE:
   923  			case JIT_TYPE_UBYTE:
   924  			case JIT_TYPE_SHORT:
   925  			case JIT_TYPE_USHORT:
   926  #endif
   927  			case JIT_TYPE_INT:
   928  			case JIT_TYPE_UINT:
   929  			case JIT_TYPE_FLOAT32:
   930  			{
   931  				x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   932  											_jit_reg_info[reg].cpu_reg, 4);
   933  			}
   934  			break;
   935  
   936  			case JIT_TYPE_LONG:
   937  			case JIT_TYPE_ULONG:
   938  			case JIT_TYPE_FLOAT64:
   939  			{
   940  				x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   941  											_jit_reg_info[reg].cpu_reg, 8);
   942  			}
   943  			break;
   944  
   945  			case JIT_TYPE_STRUCT:
   946  			case JIT_TYPE_UNION:
   947  			{
   948  				jit_nuint size = jit_type_get_size(type);
   949  
   950  				if(size == 1)
   951  				{
   952  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   953  												_jit_reg_info[reg].cpu_reg, 1);
   954  				}
   955  				else if(size == 2)
   956  				{
   957  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   958  												_jit_reg_info[reg].cpu_reg, 2);
   959  				}
   960  				else if(size <= 4)
   961  				{
   962  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   963  												_jit_reg_info[reg].cpu_reg, 4);
   964  				}
   965  				else
   966  				{
   967  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
   968  												_jit_reg_info[reg].cpu_reg, 8);
   969  				}
   970  			}
   971  		}
   972  	}
   973  	else if(IS_XMM_REG(reg))
   974  	{
   975  		switch(type->kind)
   976  		{
   977  			case JIT_TYPE_FLOAT32:
   978  			{
   979  				x86_64_movss_membase_reg(inst, X86_64_RBP, offset,
   980  										 _jit_reg_info[reg].cpu_reg);
   981  			}
   982  			break;
   983  
   984  			case JIT_TYPE_FLOAT64:
   985  			{
   986  				x86_64_movsd_membase_reg(inst, X86_64_RBP, offset,
   987  										 _jit_reg_info[reg].cpu_reg);
   988  			}
   989  			break;
   990  
   991  			case JIT_TYPE_STRUCT:
   992  			case JIT_TYPE_UNION:
   993  			{
   994  				jit_nuint size = jit_type_get_size(type);
   995  
   996  				if(size <= 4)
   997  				{
   998  					x86_64_movss_membase_reg(inst, X86_64_RBP, offset,
   999  											 _jit_reg_info[reg].cpu_reg);
  1000  				}
  1001  				else if(size <= 8)
  1002  				{
  1003  					x86_64_movsd_membase_reg(inst, X86_64_RBP, offset,
  1004  											 _jit_reg_info[reg].cpu_reg);
  1005  				}
  1006  				else
  1007  				{
  1008  					jit_nint alignment = jit_type_get_alignment(type);
  1009  
  1010  					if((alignment & 0xf) == 0)
  1011  					{
  1012  						x86_64_movaps_membase_reg(inst, X86_64_RBP, offset,
  1013  												  _jit_reg_info[reg].cpu_reg);
  1014  					}
  1015  					else
  1016  					{
  1017  						x86_64_movups_membase_reg(inst, X86_64_RBP, offset,
  1018  												  _jit_reg_info[reg].cpu_reg);
  1019  					}
  1020  				}
  1021  			}
  1022  			break;
  1023  		}
  1024  	}
  1025  	else if(IS_FPU_REG(reg))
  1026  	{
  1027  		switch(type->kind)
  1028  		{
  1029  			case JIT_TYPE_FLOAT32:
  1030  			{
  1031  				x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4);
  1032  			}
  1033  			break;
  1034  
  1035  			case JIT_TYPE_FLOAT64:
  1036  			{
  1037  				x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8);
  1038  			}
  1039  			break;
  1040  
  1041  			case JIT_TYPE_NFLOAT:
  1042  			{
  1043  				if(sizeof(jit_nfloat) == sizeof(jit_float64))
  1044  				{
  1045  					x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8);
  1046  				}
  1047  				else
  1048  				{
  1049  					x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10);
  1050  				}
  1051  			}
  1052  			break;
  1053  		}
  1054  	}
  1055  
  1056  	/* Write the current instruction pointer back */
  1057  	*inst_ptr = inst;
  1058  }
  1059  
  1060  void
  1061  _jit_gen_fix_value(jit_value_t value)
  1062  {
  1063  	if(!(value->has_frame_offset) && !(value->is_constant))
  1064  	{
  1065  		jit_nuint alignment = jit_type_get_alignment(value->type);
  1066  		jit_nint size =jit_type_get_size(value->type);
  1067  		jit_nint frame_size = value->block->func->builder->frame_size;
  1068  
  1069  		/* Round the size to a multiple of the stack item size */
  1070  		size = (jit_nint)(ROUND_STACK(size));
  1071  
  1072  		/* Add the size to the existing local items */
  1073  		frame_size += size;
  1074  
  1075  		/* Align the new frame_size for the value */
  1076  		frame_size = (frame_size + (alignment - 1)) & ~(alignment - 1);
  1077  
  1078  		value->block->func->builder->frame_size = frame_size;
  1079  		value->frame_offset = -frame_size;
  1080  		value->has_frame_offset = 1;
  1081  	}
  1082  }
  1083  
  1084  void
  1085  _jit_gen_spill_global(jit_gencode_t gen, int reg, jit_value_t value)
  1086  {
  1087  	jit_cache_setup_output(16);
  1088  	if(value)
  1089  	{
  1090  		jit_type_t type = jit_type_normalize(value->type);
  1091  
  1092  		_jit_gen_fix_value(value);
  1093  
  1094  		_spill_reg(&inst, type, value->global_reg, value->frame_offset);
  1095  	}
  1096  	else
  1097  	{
  1098  		x86_64_push_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8);
  1099  	}
  1100  	jit_cache_end_output();
  1101  }
  1102  
  1103  void
  1104  _jit_gen_load_global(jit_gencode_t gen, int reg, jit_value_t value)
  1105  {
  1106  	jit_cache_setup_output(16);
  1107  	if(value)
  1108  	{
  1109  		x86_64_mov_reg_membase_size(inst,
  1110  			_jit_reg_info[value->global_reg].cpu_reg,
  1111  			X86_64_RBP, value->frame_offset, 8);
  1112  	}
  1113  	else
  1114  	{
  1115  		x86_64_pop_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8);
  1116  	}
  1117  	jit_cache_end_output();
  1118  }
  1119  
  1120  void
  1121  _jit_gen_spill_reg(jit_gencode_t gen, int reg,
  1122  				   int other_reg, jit_value_t value)
  1123  {
  1124  	jit_type_t type;
  1125  
  1126  	/* Make sure that we have sufficient space */
  1127  	jit_cache_setup_output(16);
  1128  
  1129  	/* If the value is associated with a global register, then copy to that */
  1130  	if(value->has_global_register)
  1131  	{
  1132  		reg = _jit_reg_info[reg].cpu_reg;
  1133  		other_reg = _jit_reg_info[value->global_reg].cpu_reg;
  1134  		x86_64_mov_reg_reg_size(inst, other_reg, reg, sizeof(void *));
  1135  		jit_cache_end_output();
  1136  		return;
  1137  	}
  1138  
  1139  	/* Fix the value in place within the local variable frame */
  1140  	_jit_gen_fix_value(value);
  1141  
  1142  	/* Get the normalized type */
  1143  	type = jit_type_normalize(value->type);
  1144  
  1145  	/* and spill the register */
  1146  	_spill_reg(&inst, type, reg, value->frame_offset);
  1147  
  1148  	/* End the code output process */
  1149  	jit_cache_end_output();
  1150  }
  1151  
  1152  void
  1153  _jit_gen_free_reg(jit_gencode_t gen, int reg,
  1154  		  int other_reg, int value_used)
  1155  {
  1156  	/* We only need to take explicit action if we are freeing a
  1157  	   floating-point register whose value hasn't been used yet */
  1158  	if(!value_used && IS_FPU_REG(reg))
  1159  	{
  1160  		_jit_gen_check_space(gen, 2);
  1161  		x86_fstp(gen->ptr, reg - X86_64_REG_ST0);
  1162  	}
  1163  }
  1164  
  1165  /*
  1166   * Set a register value based on a condition code.
  1167   */
  1168  static unsigned char *
  1169  setcc_reg(unsigned char *inst, int reg, int cond, int is_signed)
  1170  {
  1171  	/* Use a SETcc instruction if we have a basic register */
  1172  	x86_64_set_reg(inst, cond, reg, is_signed);
  1173  	x86_64_movzx8_reg_reg_size(inst, reg, reg, 4);
  1174  	return inst;
  1175  }
  1176  
  1177  /*
  1178   * Helper macros for fixup handling.
  1179   *
  1180   * We have only 4 bytes for the jump offsets.
  1181   * Therefore we have do something tricky here.
  1182   * The fixup pointer in the block/gen points to the last fixup.
  1183   * The fixup itself contains the offset to the previous fixup or
  1184   * null if it's the last fixup in the list.
  1185   */
  1186  
  1187  /*
  1188   * Calculate the fixup value
  1189   * This is the value stored as placeholder in the instruction.
  1190   */
  1191  #define _JIT_CALC_FIXUP(fixup_list, inst) \
  1192  	((jit_int)((jit_nint)(inst) - (jit_nint)(fixup_list)))
  1193  
  1194  /*
  1195   * Calculate the pointer to the fixup value.
  1196   */
  1197  #define _JIT_CALC_NEXT_FIXUP(fixup_list, fixup) \
  1198  	((fixup) ? ((jit_nint)(fixup_list) - (jit_nint)(fixup)) : (jit_nint)0)
  1199  
  1200  /*
  1201   * Get the long form of a branch opcode.
  1202   */
  1203  static int
  1204  long_form_branch(int opcode)
  1205  {
  1206  	if(opcode == 0xEB)
  1207  	{
  1208  		return 0xE9;
  1209  	}
  1210  	else
  1211  	{
  1212  		return opcode + 0x0F10;
  1213  	}
  1214  }
  1215  
  1216  /*
  1217   * Output a branch instruction.
  1218   */
  1219  static unsigned char *
  1220  output_branch(jit_function_t func, unsigned char *inst, int opcode,
  1221  	      jit_insn_t insn)
  1222  {
  1223  	jit_block_t block;
  1224  
  1225  	if((insn->flags & JIT_INSN_VALUE1_IS_LABEL) != 0)
  1226  	{
  1227  		/* "address_of_label" instruction */
  1228  		block = jit_block_from_label(func, (jit_label_t)(insn->value1));
  1229  	}
  1230  	else
  1231  	{
  1232  		block = jit_block_from_label(func, (jit_label_t)(insn->dest));
  1233  	}
  1234  	if(!block)
  1235  	{
  1236  		return inst;
  1237  	}
  1238  	if(block->address)
  1239  	{
  1240  		jit_nint offset;
  1241  
  1242  		/* We already know the address of the block */
  1243  		offset = ((unsigned char *)(block->address)) - (inst + 2);
  1244  		if(x86_is_imm8(offset))
  1245  		{
  1246  			/* We can output a short-form backwards branch */
  1247  			*inst++ = (unsigned char)opcode;
  1248  			*inst++ = (unsigned char)offset;
  1249  		}
  1250  		else
  1251  		{
  1252  			/* We need to output a long-form backwards branch */
  1253  			offset -= 3;
  1254  			opcode = long_form_branch(opcode);
  1255  			if(opcode < 256)
  1256  			{
  1257  				*inst++ = (unsigned char)opcode;
  1258  			}
  1259  			else
  1260  			{
  1261  				*inst++ = (unsigned char)(opcode >> 8);
  1262  				*inst++ = (unsigned char)opcode;
  1263  				--offset;
  1264  			}
  1265  			x86_imm_emit32(inst, offset);
  1266  		}
  1267  	}
  1268  	else
  1269  	{
  1270  		jit_int fixup;
  1271  
  1272  		/* Output a placeholder and record on the block's fixup list */
  1273  		opcode = long_form_branch(opcode);
  1274  		if(opcode < 256)
  1275  		{
  1276  			*inst++ = (unsigned char)opcode;
  1277  		}
  1278  		else
  1279  		{
  1280  			*inst++ = (unsigned char)(opcode >> 8);
  1281  			*inst++ = (unsigned char)opcode;
  1282  		}
  1283  		if(block->fixup_list)
  1284  		{
  1285  			fixup = _JIT_CALC_FIXUP(block->fixup_list, inst);
  1286  		}
  1287  		else
  1288  		{
  1289  			fixup = 0;
  1290  		}
  1291  		block->fixup_list = (void *)inst;
  1292  		x86_imm_emit32(inst, fixup);
  1293  
  1294  		if(DEBUG_FIXUPS)
  1295  		{
  1296  			fprintf(stderr,
  1297  					"Block: %lx, Current Fixup: %lx, Next fixup: %lx\n",
  1298  					(jit_nint)block, (jit_nint)(block->fixup_list),
  1299  					(jit_nint)fixup);
  1300  		}
  1301  	}
  1302  	return inst;
  1303  }
  1304  
  1305  /*
  1306   * Jump to the current function's epilog.
  1307   */
  1308  static unsigned char *
  1309  jump_to_epilog(jit_gencode_t gen, unsigned char *inst, jit_block_t block)
  1310  {
  1311  	jit_int fixup;
  1312  
  1313  	/* If the epilog is the next thing that we will output,
  1314  	   then fall through to the epilog directly */
  1315  	if(_jit_block_is_final(block))
  1316  	{
  1317  		return inst;
  1318  	}
  1319  
  1320  	/* Output a placeholder for the jump and add it to the fixup list */
  1321  	*inst++ = (unsigned char)0xE9;
  1322  	if(gen->epilog_fixup)
  1323  	{
  1324  		fixup = _JIT_CALC_FIXUP(gen->epilog_fixup, inst);
  1325  	}
  1326  	else
  1327  	{
  1328  		fixup = 0;
  1329  	}
  1330  	gen->epilog_fixup = (void *)inst;
  1331  	x86_imm_emit32(inst, fixup);
  1332  	return inst;
  1333  }
  1334  
  1335  /*
  1336   * fixup a register being alloca'd to by accounting for the param area
  1337   */
  1338  static unsigned char *
  1339  fixup_alloca(jit_gencode_t gen, unsigned char *inst, int reg)
  1340  {
  1341  #ifdef JIT_USE_PARAM_AREA
  1342  	jit_int fixup;
  1343  	jit_int temp;
  1344  
  1345  	/*
  1346  	 * emit the instruction and then replace the imm section of op with
  1347  	 * the fixup.
  1348  	 * NOTE: We are using the temp variable here to avoid a compiler
  1349  	 * warning and the temp value to make sure that an instruction with
  1350  	 * a 32 bit immediate is emitted. The temp value in the instruction
  1351  	 * will be replaced by the fixup
  1352  	 */
  1353  	temp = 1234567;
  1354  	x86_64_add_reg_imm_size(inst, reg, temp, 8);
  1355  
  1356  	/* Make inst pointing to the 32bit immediate in the instruction */
  1357  	inst -= 4;
  1358  
  1359  	/* calculalte the fixup */
  1360  	if (gen->alloca_fixup)
  1361  	{
  1362  		fixup = _JIT_CALC_FIXUP(gen->alloca_fixup, inst);
  1363  	}
  1364  	else
  1365  	{
  1366  		fixup = 0;
  1367  	}
  1368  	gen->alloca_fixup = (void *)inst;
  1369  	x86_imm_emit32(inst, fixup);
  1370  #else /* !JIT_USE_PARAM_AREA */
  1371  	/* alloca fixup is not needed if the param area is not used */
  1372  #endif /* JIT_USE_PARAM_AREA */
  1373  	return inst;
  1374  }
  1375  
  1376  /*
  1377   * Compare a xmm register with an immediate value.
  1378   */
  1379  static unsigned char *
  1380  xmm_cmp_reg_imm(jit_gencode_t gen, unsigned char *inst, int xreg, void *imm,
  1381  		int is_double)
  1382  {
  1383  	int inst_len = 7 + (is_double ? 1 : 0) + (xreg > 7 ? 1 : 0);
  1384  	void *ptr;
  1385  	jit_nint offset;
  1386  
  1387  	if(is_double)
  1388  	{
  1389  		ptr = _jit_gen_alloc(gen, sizeof(jit_float64));
  1390  		if(!ptr)
  1391  		{
  1392  			return 0;
  1393  		}
  1394  		jit_memcpy(ptr, imm, sizeof(jit_float64));
  1395  	}
  1396  	else
  1397  	{
  1398  		ptr = _jit_gen_alloc(gen, sizeof(jit_float32));
  1399  		if(!ptr)
  1400  		{
  1401  			return 0;
  1402  		}
  1403  		jit_memcpy(ptr, imm, sizeof(jit_float32));
  1404  	}
  1405  	offset = (jit_nint)ptr - ((jit_nint)inst + inst_len);
  1406  	if((offset >= jit_min_int) && (offset <= jit_max_int))
  1407  	{
  1408  		/* We can use RIP relative addressing here */
  1409  		if(is_double)
  1410  		{
  1411  			x86_64_ucomisd_reg_membase(inst, xreg, X86_64_RIP, offset);
  1412  		}
  1413  		else
  1414  		{
  1415  			x86_64_ucomiss_reg_membase(inst, xreg, X86_64_RIP, offset);
  1416  		}
  1417  	}
  1418  	else if(((jit_nint)ptr >= jit_min_int) &&
  1419  		((jit_nint)ptr <= jit_max_int))
  1420  	{
  1421  		/* We can use absolute addressing */
  1422  		if(is_double)
  1423  		{
  1424  			x86_64_ucomisd_reg_mem(inst, xreg, (jit_nint)ptr);
  1425  		}
  1426  		else
  1427  		{
  1428  			x86_64_ucomiss_reg_mem(inst, xreg, (jit_nint)ptr);
  1429  		}
  1430  	}
  1431  	else
  1432  	{
  1433  		/* We have to use an extra general register */
  1434  		TODO();
  1435  		return 0;
  1436  	}
  1437  	return inst;
  1438  }
  1439  
  1440  /*
  1441   * Compare two scalar float or double values and set dreg depending on the
  1442   * flags set.
  1443   * The result for nan values depends on nan_result.
  1444   * If nan_result is == 0 then the result is 0 if any nan value is involved,
  1445   * otherwise the result is true.
  1446   */
  1447  static unsigned char *
  1448  xmm_setcc(unsigned char *inst, int dreg, int cond, int sreg, int nan_result)
  1449  {
  1450  	x86_64_set_reg(inst, cond, dreg, 0);
  1451  	if(nan_result)
  1452  	{
  1453  		/*
  1454  		 * Check pf only for comparisions where a flag is checked
  1455  		 * for 0 because an unordered result sets all flags.
  1456  		 * The cases where the additional check is not needed is
  1457  		 * eq, lt and le.
  1458  		 */
  1459  		if((cond != 0) && (cond != 2) && (cond != 3))
  1460  		{
  1461  			x86_64_set_reg(inst, 8 /* p */ , sreg, 0);
  1462  			x86_64_or_reg_reg_size(inst, dreg, sreg, 4);
  1463  		}
  1464  	}
  1465  	else
  1466  	{
  1467  		/*
  1468  		 * Check pf only for comparisions where a flag is checked
  1469  		 * for 1 because an unordered result sets all flags.
  1470  		 * The cases where the additional check is not needed is
  1471  		 * ne, gt and ge.
  1472  		 */
  1473  		if((cond != 1) && (cond != 4) && (cond != 5))
  1474  		{
  1475  			x86_64_set_reg(inst, 9 /* np */ , sreg, 0);
  1476  			x86_64_and_reg_reg_size(inst, dreg, sreg, 4);
  1477  		}
  1478  	}
  1479  	x86_64_movzx8_reg_reg_size(inst, dreg, dreg, 4);
  1480  	return inst;
  1481  }
  1482  
  1483  static unsigned char *
  1484  xmm_cmp_setcc_reg_imm(jit_gencode_t gen, unsigned char *inst, int dreg,
  1485  		      int cond, int xreg, void *imm, int sreg, int is_double,
  1486  		      int nan_result)
  1487  {
  1488  	inst = xmm_cmp_reg_imm(gen, inst, xreg, imm, is_double);
  1489  	return xmm_setcc(inst, dreg, cond, sreg, nan_result);
  1490  }
  1491  
  1492  static unsigned char *
  1493  xmm_cmp_setcc_reg_reg(unsigned char *inst, int dreg, int cond, int xreg1,
  1494  		      int xreg2, int sreg, int is_double, int nan_result)
  1495  {
  1496  	if(is_double)
  1497  	{
  1498  		x86_64_ucomisd_reg_reg(inst, xreg1, xreg2);
  1499  	}
  1500  	else
  1501  	{
  1502  		x86_64_ucomiss_reg_reg(inst, xreg1, xreg2);
  1503  	}
  1504  	return xmm_setcc(inst, dreg, cond, sreg, nan_result);
  1505  }
  1506  
  1507  /*
  1508   * Compare two float values and branch depending on the flags.
  1509   */
  1510  static unsigned char *
  1511  xmm_brcc(jit_function_t func, unsigned char *inst, int cond, int nan_result,
  1512  	 jit_insn_t insn)
  1513  {
  1514  	if(nan_result)
  1515  	{
  1516  		/*
  1517  		 * Check pf only for comparisions where a flag is checked
  1518  		 * for 0 because an unordered result sets all flags.
  1519  		 * The cases where the additional check is not needed is
  1520  		 * eq, lt and le.
  1521  		 */
  1522  		if((cond != 0) && (cond != 2) && (cond != 3))
  1523  		{
  1524  			/* Branch if the parity flag is set */
  1525  			inst = output_branch(func, inst,
  1526  					     x86_cc_unsigned_map[8], insn);
  1527  		}
  1528  		inst = output_branch(func, inst, x86_cc_unsigned_map[cond], insn);
  1529  	}
  1530  	else
  1531  	{
  1532  		/*
  1533  		 * Check pf only for comparisions where a flag is checked
  1534  		 * for 1 because an unordered result sets all flags.
  1535  		 * The cases where the additional check is not needed is
  1536  		 * ne, gt and ge.
  1537  		 */
  1538  		if((cond != 1) && (cond != 4) && (cond != 5))
  1539  		{
  1540  			unsigned char *patch;
  1541  			patch = inst;
  1542  			x86_branch8(inst, X86_CC_P, 0, 0);
  1543  			inst = output_branch(func, inst,
  1544  					     x86_cc_unsigned_map[cond], insn);
  1545  			x86_patch(patch, inst);
  1546  		}
  1547  		else
  1548  		{
  1549  			inst = output_branch(func, inst,
  1550  					     x86_cc_unsigned_map[cond], insn);
  1551  		}
  1552  	}
  1553  	return inst;
  1554  }
  1555  
  1556  static unsigned char *
  1557  xmm_cmp_brcc_reg_imm(jit_gencode_t gen, jit_function_t func,
  1558  		     unsigned char *inst, int cond, int xreg, void *imm,
  1559  		     int is_double, int nan_result, jit_insn_t insn)
  1560  {
  1561  	inst = xmm_cmp_reg_imm(gen, inst, xreg, imm, is_double);
  1562  	return xmm_brcc(func, inst, cond, nan_result, insn);
  1563  }
  1564  
  1565  static unsigned char *
  1566  xmm_cmp_brcc_reg_reg(jit_function_t func, unsigned char *inst, int cond,
  1567  		     int xreg1, int xreg2, int is_double, int nan_result,
  1568  		     jit_insn_t insn)
  1569  {
  1570  	if(is_double)
  1571  	{
  1572  		x86_64_ucomisd_reg_reg(inst, xreg1, xreg2);
  1573  	}
  1574  	else
  1575  	{
  1576  		x86_64_ucomiss_reg_reg(inst, xreg1, xreg2);
  1577  	}
  1578  	return xmm_brcc(func, inst, cond, nan_result, insn);
  1579  }
  1580  
  1581  static unsigned char *
  1582  xmm_cmp_brcc_reg_membase(jit_function_t func, unsigned char *inst, int cond,
  1583  			 int xreg1, int basereg, int offset, int is_double,
  1584  			 int nan_result, jit_insn_t insn)
  1585  {
  1586  	if(is_double)
  1587  	{
  1588  		x86_64_ucomisd_reg_membase(inst, xreg1, basereg, offset);
  1589  	}
  1590  	else
  1591  	{
  1592  		x86_64_ucomiss_reg_membase(inst, xreg1, basereg, offset);
  1593  	}
  1594  	return xmm_brcc(func, inst, cond, nan_result, insn);
  1595  }
  1596  
  1597  /*
  1598   * Support functiond for the FPU stack
  1599   */
  1600  
  1601  static int
  1602  fp_stack_index(jit_gencode_t gen, int reg)
  1603  {
  1604  	return gen->reg_stack_top - reg - 1;
  1605  }
  1606  
  1607  void
  1608  _jit_gen_exch_top(jit_gencode_t gen, int reg)
  1609  {
  1610  	if(IS_FPU_REG(reg))
  1611  	{
  1612  		jit_cache_setup_output(2);
  1613  		x86_fxch(inst, fp_stack_index(gen, reg));
  1614  		jit_cache_end_output();
  1615  	}
  1616  }
  1617  
  1618  void
  1619   _jit_gen_move_top(jit_gencode_t gen, int reg)
  1620  {
  1621  	if(IS_FPU_REG(reg))
  1622  	{
  1623  		jit_cache_setup_output(2);
  1624  		x86_fstp(inst, fp_stack_index(gen, reg));
  1625  		jit_cache_end_output();
  1626  	}
  1627  }
  1628  
  1629  void
  1630  _jit_gen_spill_top(jit_gencode_t gen, int reg, jit_value_t value, int pop)
  1631  {
  1632  	if(IS_FPU_REG(reg))
  1633  	{
  1634  		int offset;
  1635  
  1636  		/* Make sure that we have sufficient space */
  1637  		jit_cache_setup_output(16);
  1638  
  1639  		/* Fix the value in place within the local variable frame */
  1640  		_jit_gen_fix_value(value);
  1641  
  1642  		/* Output an appropriate instruction to spill the value */
  1643  		offset = (int)(value->frame_offset);
  1644  
  1645  		/* Spill the top of the floating-point register stack */
  1646  		switch(jit_type_normalize(value->type)->kind)
  1647  		{
  1648  			case JIT_TYPE_FLOAT32:
  1649  			{
  1650  				if(pop)
  1651  				{
  1652  					x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4);
  1653  				}
  1654  				else
  1655  				{
  1656  					x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4);
  1657  				}
  1658  			}
  1659  			break;
  1660  
  1661  			case JIT_TYPE_FLOAT64:
  1662  			{
  1663  				if(pop)
  1664  				{
  1665  					x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8);
  1666  				}
  1667  				else
  1668  				{
  1669  					x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8);
  1670  				}
  1671  			}
  1672  			break;
  1673  
  1674  			case JIT_TYPE_NFLOAT:
  1675  			{
  1676  				if(sizeof(jit_nfloat) == sizeof(jit_float64))
  1677  				{
  1678  					if(pop)
  1679  					{
  1680  						x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8);
  1681  					}
  1682  					else
  1683  					{
  1684  						x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8);
  1685  					}
  1686  				}
  1687  				else
  1688  				{
  1689  					x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10);
  1690  					if(!pop)
  1691  					{
  1692  						x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10);
  1693  					}
  1694  				}
  1695  			}
  1696  			break;
  1697  		}
  1698  
  1699  		/* End the code output process */
  1700  		jit_cache_end_output();
  1701  	}
  1702  }
  1703  
  1704  void
  1705  _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value)
  1706  {
  1707  	jit_type_t type;
  1708  	int src_reg;
  1709  	void *ptr;
  1710  	int offset;
  1711  
  1712  	/* Make sure that we have sufficient space */
  1713  	jit_cache_setup_output(16);
  1714  
  1715  	type = jit_type_normalize(value->type);
  1716  
  1717  	/* Load zero */
  1718  	if(value->is_constant)
  1719  	{
  1720  		switch(type->kind)
  1721  		{
  1722  			case JIT_TYPE_SBYTE:
  1723  			case JIT_TYPE_UBYTE:
  1724  			case JIT_TYPE_SHORT:
  1725  			case JIT_TYPE_USHORT:
  1726  			case JIT_TYPE_INT:
  1727  			case JIT_TYPE_UINT:
  1728  			{
  1729  				if((jit_nint)(value->address) == 0)
  1730  				{
  1731  					x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg);
  1732  				}
  1733  				else
  1734  				{
  1735  					x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1736  							(jit_nint)(value->address), 4);
  1737  				}
  1738  			}
  1739  			break;
  1740  
  1741  			case JIT_TYPE_LONG:
  1742  			case JIT_TYPE_ULONG:
  1743  			{
  1744  				if((jit_nint)(value->address) == 0)
  1745  				{
  1746  					x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg);
  1747  				}
  1748  				else
  1749  				{
  1750  					if((jit_nint)(value->address) > 0 && (jit_nint)(value->address) <= (jit_nint)jit_max_uint)
  1751  					{
  1752  						x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1753  								(jit_nint)(value->address), 4);
  1754  
  1755  					}
  1756  					else
  1757  					{
  1758  						x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1759  								(jit_nint)(value->address), 8);
  1760  					}
  1761  				}
  1762  			}
  1763  			break;
  1764  
  1765  			case JIT_TYPE_FLOAT32:
  1766  			{
  1767  				jit_float32 float32_value;
  1768  
  1769  				float32_value = jit_value_get_float32_constant(value);
  1770  
  1771  				if(IS_GENERAL_REG(reg))
  1772  				{
  1773  					union
  1774  					{
  1775  						jit_float32 float32_value;
  1776  						jit_int int_value;
  1777  					} un;
  1778  
  1779  					un.float32_value = float32_value;
  1780  					x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1781  											un.int_value, 4);
  1782  				}
  1783  				else if(IS_XMM_REG(reg))
  1784  				{
  1785  					int xmm_reg = _jit_reg_info[reg].cpu_reg;
  1786  
  1787  					if(float32_value == (jit_float32) 0.0)
  1788  					{
  1789  						x86_64_clear_xreg(inst, xmm_reg);
  1790  					}
  1791  					else
  1792  					{
  1793  						_jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MOV,
  1794  													   xmm_reg, &float32_value);
  1795  					}
  1796  				}
  1797  				else
  1798  				{
  1799  					if(float32_value == (jit_float32) 0.0)
  1800  					{
  1801  						x86_fldz(inst);
  1802  					}
  1803  					else if(float32_value == (jit_float32) 1.0)
  1804  					{
  1805  						x86_fld1(inst);
  1806  					}
  1807  					else
  1808  					{
  1809  						jit_nint offset;
  1810  
  1811  						ptr = _jit_gen_alloc(gen, sizeof(jit_float32));
  1812  						jit_memcpy(ptr, &float32_value, sizeof(float32_value));
  1813  
  1814  						offset = (jit_nint)ptr - ((jit_nint)inst + 6);
  1815  						if((offset >= jit_min_int) && (offset <= jit_max_int))
  1816  						{
  1817  							/* We can use RIP relative addressing here */
  1818  							x86_64_fld_membase_size(inst, X86_64_RIP, offset, 4);
  1819  						}
  1820  						else if(((jit_nint)ptr >= jit_min_int) &&
  1821  								((jit_nint)ptr <= jit_max_int))
  1822  						{
  1823  							/* We can use absolute addressing */
  1824  							x86_64_fld_mem_size(inst, (jit_nint)ptr, 4);
  1825  						}
  1826  						else
  1827  						{
  1828  							/* We have to use an extra general register */
  1829  							TODO();
  1830  						}
  1831  					}
  1832  				}
  1833  			}
  1834  			break;
  1835  
  1836  			case JIT_TYPE_FLOAT64:
  1837  			{
  1838  				jit_float64 float64_value;
  1839  				float64_value = jit_value_get_float64_constant(value);
  1840  				if(IS_GENERAL_REG(reg))
  1841  				{
  1842  					union
  1843  					{
  1844  						jit_float64 float64_value;
  1845  						jit_long long_value;
  1846  					} un;
  1847  
  1848  					un.float64_value = float64_value;
  1849  					x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1850  											un.long_value, 8);
  1851  				}
  1852  				else if(IS_XMM_REG(reg))
  1853  				{
  1854  					int xmm_reg = _jit_reg_info[reg].cpu_reg;
  1855  
  1856  					if(float64_value == (jit_float64) 0.0)
  1857  					{
  1858  						x86_64_clear_xreg(inst, xmm_reg);
  1859  					}
  1860  					else
  1861  					{
  1862  						_jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MOV,
  1863  													   xmm_reg, &float64_value);
  1864  					}
  1865  				}
  1866  				else
  1867  				{
  1868  					if(float64_value == (jit_float64) 0.0)
  1869  					{
  1870  						x86_fldz(inst);
  1871  					}
  1872  					else if(float64_value == (jit_float64) 1.0)
  1873  					{
  1874  						x86_fld1(inst);
  1875  					}
  1876  					else
  1877  					{
  1878  						jit_nint offset;
  1879  
  1880  						ptr = _jit_gen_alloc(gen, sizeof(jit_float64));
  1881  						jit_memcpy(ptr, &float64_value, sizeof(float64_value));
  1882  
  1883  						offset = (jit_nint)ptr - ((jit_nint)inst + 6);
  1884  						if((offset >= jit_min_int) && (offset <= jit_max_int))
  1885  						{
  1886  							/* We can use RIP relative addressing here */
  1887  							x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8);
  1888  						}
  1889  						else if(((jit_nint)ptr >= jit_min_int) &&
  1890  								((jit_nint)ptr <= jit_max_int))
  1891  						{
  1892  							/* We can use absolute addressing */
  1893  							x86_64_fld_mem_size(inst, (jit_nint)ptr, 8);
  1894  						}
  1895  						else
  1896  						{
  1897  							/* We have to use an extra general register */
  1898  							TODO();
  1899  						}
  1900  					}
  1901  				}
  1902  			}
  1903  			break;
  1904  
  1905  			case JIT_TYPE_NFLOAT:
  1906  			{
  1907  				jit_nfloat nfloat_value;
  1908  				nfloat_value = jit_value_get_nfloat_constant(value);
  1909  				if(IS_GENERAL_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64))
  1910  				{
  1911  					union
  1912  					{
  1913  						jit_nfloat nfloat_value;
  1914  						jit_long long_value;
  1915  					} un;
  1916  
  1917  					un.nfloat_value = nfloat_value;
  1918  					x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg,
  1919  											un.long_value, 8);
  1920  				}
  1921  				else if(IS_XMM_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64))
  1922  				{
  1923  					jit_nint offset;
  1924  					int xmm_reg = _jit_reg_info[reg].cpu_reg;
  1925  
  1926  					ptr = _jit_gen_alloc(gen, sizeof(jit_nfloat));
  1927  					jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value));
  1928  					offset = (jit_nint)ptr -
  1929  								((jit_nint)inst + (xmm_reg > 7 ? 9 : 8));
  1930  					if((offset >= jit_min_int) && (offset <= jit_max_int))
  1931  					{
  1932  						/* We can use RIP relative addressing here */
  1933  						x86_64_movsd_reg_membase(inst, xmm_reg, X86_64_RIP, offset);
  1934  					}
  1935  					else if(((jit_nint)ptr >= jit_min_int) &&
  1936  							((jit_nint)ptr <= jit_max_int))
  1937  					{
  1938  						/* We can use absolute addressing */
  1939  						x86_64_movsd_reg_mem(inst, xmm_reg, (jit_nint)ptr);
  1940  					}
  1941  					else
  1942  					{
  1943  						/* We have to use an extra general register */
  1944  						TODO();
  1945  					}
  1946  				}
  1947  				else
  1948  				{
  1949  					if(nfloat_value == (jit_nfloat) 0.0)
  1950  					{
  1951  						x86_fldz(inst);
  1952  					}
  1953  					else if(nfloat_value == (jit_nfloat) 1.0)
  1954  					{
  1955  						x86_fld1(inst);
  1956  					}
  1957  					else
  1958  					{
  1959  						jit_nint offset;
  1960  
  1961  						ptr = _jit_gen_alloc(gen, sizeof(jit_nfloat));
  1962  						jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value));
  1963  
  1964  						offset = (jit_nint)ptr - ((jit_nint)inst + 6);
  1965  						if((offset >= jit_min_int) && (offset <= jit_max_int))
  1966  						{
  1967  							/* We can use RIP relative addressing here */
  1968  							if(sizeof(jit_nfloat) == sizeof(jit_float64))
  1969  							{
  1970  								x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8);
  1971  							}
  1972  							else
  1973  							{
  1974  								x86_64_fld_membase_size(inst, X86_64_RIP, offset, 10);
  1975  							}
  1976  						}
  1977  						else if(((jit_nint)ptr >= jit_min_int) &&
  1978  								((jit_nint)ptr <= jit_max_int))
  1979  						{
  1980  							/* We can use absolute addressing */
  1981  							if(sizeof(jit_nfloat) == sizeof(jit_float64))
  1982  							{
  1983  								x86_64_fld_mem_size(inst, (jit_nint)ptr, 8);
  1984  							}
  1985  							else
  1986  							{
  1987  								x86_64_fld_mem_size(inst, (jit_nint)ptr, 10);
  1988  							}
  1989  						}
  1990  						else
  1991  						{
  1992  							/* We have to use an extra general register */
  1993  							TODO();
  1994  						}
  1995  					}
  1996  				}
  1997  			}
  1998  			break;
  1999  		}
  2000  	}
  2001  	else if(value->in_register || value->in_global_register)
  2002  	{
  2003  		if(value->in_register)
  2004  		{
  2005  			src_reg = value->reg;
  2006  		}
  2007  		else
  2008  		{
  2009  			src_reg = value->global_reg;
  2010  		}
  2011  
  2012  		switch(type->kind)
  2013  		{
  2014  #if 0
  2015  			case JIT_TYPE_SBYTE:
  2016  			{
  2017  				x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg,
  2018  					      _jit_reg_info[src_reg].cpu_reg, 1, 0);
  2019  			}
  2020  			break;
  2021  
  2022  			case JIT_TYPE_UBYTE:
  2023  			{
  2024  				x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg,
  2025  					      _jit_reg_info[src_reg].cpu_reg, 0, 0);
  2026  			}
  2027  			break;
  2028  
  2029  			case JIT_TYPE_SHORT:
  2030  			{
  2031  				x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg,
  2032  					      _jit_reg_info[src_reg].cpu_reg, 1, 1);
  2033  			}
  2034  			break;
  2035  
  2036  			case JIT_TYPE_USHORT:
  2037  			{
  2038  				x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg,
  2039  					      _jit_reg_info[src_reg].cpu_reg, 0, 1);
  2040  			}
  2041  			break;
  2042  #else
  2043  			case JIT_TYPE_SBYTE:
  2044  			case JIT_TYPE_UBYTE:
  2045  			case JIT_TYPE_SHORT:
  2046  			case JIT_TYPE_USHORT:
  2047  #endif
  2048  			case JIT_TYPE_INT:
  2049  			case JIT_TYPE_UINT:
  2050  			{
  2051  				x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg,
  2052  										_jit_reg_info[src_reg].cpu_reg, 4);
  2053  			}
  2054  			break;
  2055  
  2056  			case JIT_TYPE_LONG:
  2057  			case JIT_TYPE_ULONG:
  2058  			{
  2059  				x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg,
  2060  										_jit_reg_info[src_reg].cpu_reg, 8);
  2061  			}
  2062  			break;
  2063  
  2064  			case JIT_TYPE_FLOAT32:
  2065  			{
  2066  				if(IS_FPU_REG(reg))
  2067  				{
  2068  					if(IS_FPU_REG(src_reg))
  2069  					{
  2070  						x86_fld_reg(inst, fp_stack_index(gen, src_reg));
  2071  					}
  2072  					else if(IS_XMM_REG(src_reg))
  2073  					{
  2074  						/* Fix the position of the value in the stack frame */
  2075  						_jit_gen_fix_value(value);
  2076  						offset = (int)(value->frame_offset);
  2077  
  2078  						x86_64_movss_membase_reg(inst, X86_64_RBP, offset,
  2079  												 _jit_reg_info[src_reg].cpu_reg);
  2080  						x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
  2081  					}
  2082  				}
  2083  				else if(IS_XMM_REG(reg))
  2084  				{
  2085  					if(IS_FPU_REG(src_reg))
  2086  					{
  2087  						/* Fix the position of the value in the stack frame */
  2088  						_jit_gen_fix_value(value);
  2089  						offset = (int)(value->frame_offset);
  2090  
  2091  						x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4);
  2092  						x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2093  												 X86_64_RBP, offset);
  2094  					}
  2095  					else if(IS_XMM_REG(src_reg))
  2096  					{
  2097  						x86_64_movss_reg_reg(inst, _jit_reg_info[reg].cpu_reg,
  2098  											 _jit_reg_info[src_reg].cpu_reg);
  2099  					}
  2100  				}
  2101  			}
  2102  			break;
  2103  
  2104  			case JIT_TYPE_FLOAT64:
  2105  			{
  2106  				if(IS_FPU_REG(reg))
  2107  				{
  2108  					if(IS_FPU_REG(src_reg))
  2109  					{
  2110  						x86_fld_reg(inst, fp_stack_index(gen, src_reg));
  2111  					}
  2112  					else if(IS_XMM_REG(src_reg))
  2113  					{
  2114  						/* Fix the position of the value in the stack frame */
  2115  						_jit_gen_fix_value(value);
  2116  						offset = (int)(value->frame_offset);
  2117  
  2118  						x86_64_movsd_membase_reg(inst, X86_64_RBP, offset,
  2119  												 _jit_reg_info[src_reg].cpu_reg);
  2120  						x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
  2121  					}
  2122  				}
  2123  				else if(IS_XMM_REG(reg))
  2124  				{
  2125  					if(IS_FPU_REG(src_reg))
  2126  					{
  2127  						/* Fix the position of the value in the stack frame */
  2128  						_jit_gen_fix_value(value);
  2129  						offset = (int)(value->frame_offset);
  2130  
  2131  						x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8);
  2132  						x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2133  												 X86_64_RBP, offset);
  2134  					}
  2135  					else if(IS_XMM_REG(src_reg))
  2136  					{
  2137  						x86_64_movsd_reg_reg(inst, _jit_reg_info[reg].cpu_reg,
  2138  											 _jit_reg_info[src_reg].cpu_reg);
  2139  					}
  2140  				}
  2141  			}
  2142  			break;
  2143  
  2144  			case JIT_TYPE_NFLOAT:
  2145  			{
  2146  				if(IS_FPU_REG(reg))
  2147  				{
  2148  					if(IS_FPU_REG(src_reg))
  2149  					{
  2150  						x86_fld_reg(inst, fp_stack_index(gen, src_reg));
  2151  					}
  2152  					else
  2153  					{
  2154  						fputs("Unsupported native float reg - reg move\n", stderr);
  2155  					}
  2156  				}
  2157  			}
  2158  			break;
  2159  
  2160  			case JIT_TYPE_STRUCT:
  2161  			case JIT_TYPE_UNION:
  2162  			{
  2163  				if(IS_GENERAL_REG(reg))
  2164  				{
  2165  					if(IS_GENERAL_REG(src_reg))
  2166  					{
  2167  						x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg,
  2168  												_jit_reg_info[src_reg].cpu_reg, 8);
  2169  					}
  2170  					else if(IS_XMM_REG(src_reg))
  2171  					{
  2172  						x86_64_movq_reg_xreg(inst, _jit_reg_info[reg].cpu_reg,
  2173  											 _jit_reg_info[src_reg].cpu_reg);
  2174  					}
  2175  					else
  2176  					{
  2177  						fputs("Unsupported struct/union reg - reg move\n", stderr);
  2178  					}
  2179  				}
  2180  				else if(IS_XMM_REG(reg))
  2181  				{
  2182  					if(IS_GENERAL_REG(src_reg))
  2183  					{
  2184  						x86_64_movq_xreg_reg(inst, _jit_reg_info[reg].cpu_reg,
  2185  											 _jit_reg_info[src_reg].cpu_reg);
  2186  					}
  2187  					else if(IS_XMM_REG(src_reg))
  2188  					{
  2189  						x86_64_movaps_reg_reg(inst, _jit_reg_info[reg].cpu_reg,
  2190  											  _jit_reg_info[src_reg].cpu_reg);
  2191  					}
  2192  					else
  2193  					{
  2194  						fputs("Unsupported struct/union reg - reg move\n", stderr);
  2195  					}
  2196  				}
  2197  				else
  2198  				{
  2199  					fputs("Unsupported struct/union reg - reg move\n", stderr);
  2200  				}
  2201  			}
  2202  		}
  2203  	}
  2204  	else
  2205  	{
  2206  		/* Fix the position of the value in the stack frame */
  2207  		_jit_gen_fix_value(value);
  2208  		offset = (int)(value->frame_offset);
  2209  
  2210  		/* Load the value into the specified register */
  2211  		switch(type->kind)
  2212  		{
  2213  			case JIT_TYPE_SBYTE:
  2214  			{
  2215  				x86_64_movsx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2216  											   X86_64_RBP, offset, 4);
  2217  			}
  2218  			break;
  2219  
  2220  			case JIT_TYPE_UBYTE:
  2221  			{
  2222  				x86_64_movzx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2223  											   X86_64_RBP, offset, 4);
  2224  			}
  2225  			break;
  2226  
  2227  			case JIT_TYPE_SHORT:
  2228  			{
  2229  				x86_64_movsx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2230  												X86_64_RBP, offset, 4);
  2231  			}
  2232  			break;
  2233  
  2234  			case JIT_TYPE_USHORT:
  2235  			{
  2236  				x86_64_movzx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2237  												X86_64_RBP, offset, 4);
  2238  			}
  2239  			break;
  2240  
  2241  			case JIT_TYPE_INT:
  2242  			case JIT_TYPE_UINT:
  2243  			{
  2244  				x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2245  											X86_64_RBP, offset, 4);
  2246  			}
  2247  			break;
  2248  
  2249  			case JIT_TYPE_LONG:
  2250  			case JIT_TYPE_ULONG:
  2251  			{
  2252  				x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2253  											X86_64_RBP, offset, 8);
  2254  			}
  2255  			break;
  2256  
  2257  			case JIT_TYPE_FLOAT32:
  2258  			{
  2259  				if(IS_GENERAL_REG(reg))
  2260  				{
  2261  					x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2262  												X86_64_RBP, offset, 4);
  2263  				}
  2264  				if(IS_XMM_REG(reg))
  2265  				{
  2266  					x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2267  											 X86_64_RBP, offset);
  2268  				}
  2269  				else
  2270  				{
  2271  					x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
  2272  				}
  2273  			}
  2274  			break;
  2275  
  2276  			case JIT_TYPE_FLOAT64:
  2277  			{
  2278  				if(IS_GENERAL_REG(reg))
  2279  				{
  2280  					x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2281  												X86_64_RBP, offset, 8);
  2282  				}
  2283  				else if(IS_XMM_REG(reg))
  2284  				{
  2285  					x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2286  											 X86_64_RBP, offset);
  2287  				}
  2288  				else
  2289  				{
  2290  					x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
  2291  				}
  2292  			}
  2293  			break;
  2294  
  2295  			case JIT_TYPE_NFLOAT:
  2296  			{
  2297  				if(sizeof(jit_nfloat) == sizeof(jit_float64))
  2298  				{
  2299  					if(IS_GENERAL_REG(reg))
  2300  					{
  2301  						x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2302  													X86_64_RBP, offset, 8);
  2303  					}
  2304  					else if(IS_XMM_REG(reg))
  2305  					{
  2306  						x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2307  												 X86_64_RBP, offset);
  2308  					}
  2309  					else
  2310  					{
  2311  						x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
  2312  					}
  2313  				}
  2314  				else
  2315  				{
  2316  					x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10);
  2317  				}
  2318  			}
  2319  			break;
  2320  
  2321  			case JIT_TYPE_STRUCT:
  2322  			case JIT_TYPE_UNION:
  2323  			{
  2324  				jit_nuint size = jit_type_get_size(type);
  2325  
  2326  				if(IS_GENERAL_REG(reg))
  2327  				{
  2328  					if(size == 1)
  2329  					{
  2330  						x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2331  													X86_64_RBP, offset, 1);
  2332  					}
  2333  					else if(size == 2)
  2334  					{
  2335  						x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2336  													X86_64_RBP, offset, 2);
  2337  					}
  2338  					else if(size <= 4)
  2339  					{
  2340  						x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2341  													X86_64_RBP, offset, 4);
  2342  					}
  2343  					else if(size <= 8)
  2344  					{
  2345  						x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2346  													X86_64_RBP, offset, 8);
  2347  					}
  2348  				}
  2349  				else if(IS_XMM_REG(reg))
  2350  				{
  2351  					if(size <= 4)
  2352  					{
  2353  						x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2354  												 X86_64_RBP, offset);
  2355  					}
  2356  					else if(size <= 8)
  2357  					{
  2358  						x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2359  												 X86_64_RBP, offset);
  2360  					}
  2361  					else
  2362  					{
  2363  						int alignment = jit_type_get_alignment(type);
  2364  
  2365  						if((alignment & 0xf) == 0)
  2366  						{
  2367  							x86_64_movaps_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2368  													  X86_64_RBP, offset);
  2369  						}
  2370  						else
  2371  						{
  2372  							x86_64_movups_reg_membase(inst, _jit_reg_info[reg].cpu_reg,
  2373  													  X86_64_RBP, offset);
  2374  						}
  2375  					}
  2376  				}
  2377  			}
  2378  		}
  2379  	}
  2380  
  2381  	/* End the code output process */
  2382  	jit_cache_end_output();
  2383  }
  2384  
  2385  void
  2386  _jit_gen_get_elf_info(jit_elf_info_t *info)
  2387  {
  2388  	info->machine = 62;	/* EM_X86_64 */
  2389  	info->abi = 0;		/* ELFOSABI_SYSV */
  2390  	info->abi_version = 0;
  2391  }
  2392  
  2393  void *
  2394  _jit_gen_prolog(jit_gencode_t gen, jit_function_t func, void *buf)
  2395  {
  2396  	unsigned char prolog[JIT_PROLOG_SIZE];
  2397  	unsigned char *inst = prolog;
  2398  	int reg;
  2399  	int frame_size = 0;
  2400  	int regs_to_save = 0;
  2401  
  2402  	/* Push ebp onto the stack */
  2403  	x86_64_push_reg_size(inst, X86_64_RBP, 8);
  2404  
  2405  	/* Initialize EBP for the current frame */
  2406  	x86_64_mov_reg_reg_size(inst, X86_64_RBP, X86_64_RSP, 8);
  2407  
  2408  	/* Allocate space for the local variable frame */
  2409  	if(func->builder->frame_size > 0)
  2410  	{
  2411  		/* Make sure that the framesize is a multiple of 8 bytes */
  2412  		frame_size = (func->builder->frame_size + 0x7) & ~0x7;
  2413  	}
  2414  
  2415  	/* Get the number of registers we need to preserve */
  2416  	for(reg = 0; reg < 14; ++reg)
  2417  	{
  2418  		if(jit_reg_is_used(gen->touched, reg) &&
  2419  		   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
  2420  		{
  2421  			++regs_to_save;
  2422  		}
  2423  	}
  2424  
  2425  	/* add the register save area to the initial frame size */
  2426  	frame_size += (regs_to_save << 3);
  2427  
  2428  #ifdef JIT_USE_PARAM_AREA
  2429  	/* Add the param area to the frame_size if the additional offset
  2430  	   doesnt cause the offsets in the register saves become 4 bytes */
  2431  	if(func->builder->param_area_size > 0 &&
  2432  	   (func->builder->param_area_size <= 0x50 || regs_to_save == 0))
  2433  	{
  2434  		frame_size += func->builder->param_area_size;
  2435  	}
  2436  #endif /* JIT_USE_PARAM_AREA */
  2437  
  2438  	/* Make sure that the framesize is a multiple of 16 bytes */
  2439  	/* so that the final RSP will be alligned on a 16byte boundary. */
  2440  	frame_size = (frame_size + 0xf) & ~0xf;
  2441  
  2442  	if(frame_size > 0)
  2443  	{
  2444  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, frame_size, 8);
  2445  	}
  2446  
  2447  	if(regs_to_save > 0)
  2448  	{
  2449  		int current_offset;
  2450  #ifdef JIT_USE_PARAM_AREA
  2451  		if(func->builder->param_area_size > 0 &&
  2452  		   func->builder->param_area_size <= 0x50)
  2453  		{
  2454  			current_offset = func->builder->param_area_size;
  2455  		}
  2456  		else
  2457  #endif /* JIT_USE_PARAM_AREA */
  2458  		{
  2459  			current_offset = 0;
  2460  		}
  2461  
  2462  		/* Save registers that we need to preserve */
  2463  		for(reg = 0; reg <= 14; ++reg)
  2464  		{
  2465  			if(jit_reg_is_used(gen->touched, reg) &&
  2466  			   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
  2467  			{
  2468  				x86_64_mov_membase_reg_size(inst, X86_64_RSP, current_offset,
  2469  											_jit_reg_info[reg].cpu_reg, 8);
  2470  				current_offset += 8;
  2471  			}
  2472  		}
  2473  	}
  2474  #ifdef JIT_USE_PARAM_AREA
  2475  	if(func->builder->param_area_size > 0x50 && regs_to_save > 0)
  2476  	{
  2477  		x86_64_sub_reg_imm_size(inst, X86_64_RSP, func->builder->param_area_size, 8);
  2478  	}
  2479  #endif /* JIT_USE_PARAM_AREA */
  2480  
  2481  	/* Copy the prolog into place and return the adjusted entry position */
  2482  	reg = (int)(inst - prolog);
  2483  	jit_memcpy(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg, prolog, reg);
  2484  	return (void *)(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg);
  2485  }
  2486  
  2487  void
  2488  _jit_gen_epilog(jit_gencode_t gen, jit_function_t func)
  2489  {
  2490  	unsigned char *inst;
  2491  	int reg;
  2492  	int current_offset;
  2493  	jit_int *fixup;
  2494  	jit_int *next;
  2495  
  2496  	/* Bail out if there is insufficient space for the epilog */
  2497  	_jit_gen_check_space(gen, 48);
  2498  
  2499  	inst = gen->ptr;
  2500  
  2501  	/* Perform fixups on any blocks that jump to the epilog */
  2502  	fixup = (jit_int *)(gen->epilog_fixup);
  2503  	while(fixup != 0)
  2504  	{
  2505  		if(DEBUG_FIXUPS)
  2506  		{
  2507  			fprintf(stderr, "Fixup Address: %lx, Value: %x\n",
  2508  					(jit_nint)fixup, fixup[0]);
  2509  		}
  2510  		next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]);
  2511  		fixup[0] = (jit_int)(((jit_nint)inst) - ((jit_nint)fixup) - 4);
  2512  		fixup = next;
  2513  	}
  2514  	gen->epilog_fixup = 0;
  2515  
  2516  	/* Perform fixups on any alloca calls */
  2517  	fixup = (jit_int *)(gen->alloca_fixup);
  2518  	while (fixup != 0)
  2519  	{
  2520  		next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]);
  2521  		fixup[0] = func->builder->param_area_size;
  2522  		if(DEBUG_FIXUPS)
  2523  		{
  2524  			fprintf(stderr, "Fixup Param Area Size: %lx, Value: %x\n",
  2525  					(jit_nint)fixup, fixup[0]);
  2526  		}
  2527  		fixup = next;
  2528  	}
  2529  	gen->alloca_fixup = 0;
  2530  
  2531  	/* Restore the used callee saved registers */
  2532  	if(gen->stack_changed)
  2533  	{
  2534  		int frame_size = func->builder->frame_size;
  2535  		int regs_saved = 0;
  2536  
  2537  		/* Get the number of registers we preserves */
  2538  		for(reg = 0; reg < 14; ++reg)
  2539  		{
  2540  			if(jit_reg_is_used(gen->touched, reg) &&
  2541  			   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
  2542  			{
  2543  				++regs_saved;
  2544  			}
  2545  		}
  2546  
  2547  		/* add the register save area to the initial frame size */
  2548  		frame_size += (regs_saved << 3);
  2549  
  2550  		/* Make sure that the framesize is a multiple of 16 bytes */
  2551  		/* so that the final RSP will be alligned on a 16byte boundary. */
  2552  		frame_size = (frame_size + 0xf) & ~0xf;
  2553  
  2554  		current_offset = -frame_size;
  2555  
  2556  		for(reg = 0; reg <= 14; ++reg)
  2557  		{
  2558  			if(jit_reg_is_used(gen->touched, reg) &&
  2559  			   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
  2560  			{
  2561  				x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2562  							    X86_64_RBP, current_offset, 8);
  2563  				current_offset += 8;
  2564  			}
  2565  		}
  2566  	}
  2567  	else
  2568  	{
  2569  #ifdef JIT_USE_PARAM_AREA
  2570  		if(func->builder->param_area_size > 0)
  2571  		{
  2572  			current_offset = func->builder->param_area_size;
  2573  		}
  2574  		else
  2575  		{
  2576  			current_offset = 0;
  2577  		}
  2578  #else /* !JIT_USE_PARAM_AREA */
  2579  		current_offset = 0;
  2580  #endif /* !JIT_USE_PARAM_AREA */
  2581  		for(reg = 0; reg <= 14; ++reg)
  2582  		{
  2583  			if(jit_reg_is_used(gen->touched, reg) &&
  2584  			   (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0)
  2585  			{
  2586  				x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg,
  2587  							    X86_64_RSP, current_offset, 8);
  2588  				current_offset += 8;
  2589  			}
  2590  		}
  2591  	}
  2592  
  2593  	/* Restore stackpointer and frame register */
  2594  	x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8);
  2595  	x86_64_pop_reg_size(inst, X86_64_RBP, 8);
  2596  
  2597  	/* and return */
  2598  	x86_64_ret(inst);
  2599  
  2600  	gen->ptr = inst;
  2601  }
  2602  
  2603  /*
  2604   * Copy a small block. This generates inlined code.
  2605   *
  2606   * Set is_aligned to zero if the source or target locations might be not
  2607   * aligned on a 16-byte boundary and to non-zero if both blocks are always
  2608   * aligned.
  2609   *
  2610   * We assume that offset + size is in the range -2GB ... +2GB.
  2611   */
  2612  static unsigned char *
  2613  small_block_copy(jit_gencode_t gen, unsigned char *inst,
  2614  				 int dreg, jit_nint doffset,
  2615  				 int sreg, jit_nint soffset, jit_int size,
  2616  				 int scratch_reg, int scratch_xreg, int is_aligned)
  2617  {
  2618  	jit_nint offset = 0;
  2619  	int i;
  2620  
  2621  	/* Copy all 16 byte blocks of the struct */
  2622  	while(size >= 16)
  2623  	{
  2624  		if(is_aligned)
  2625  		{
  2626  			x86_64_movaps_reg_membase(inst, scratch_xreg,
  2627  									  sreg, soffset + offset);
  2628  			x86_64_movaps_membase_reg(inst, dreg, doffset + offset,
  2629  									  scratch_xreg);
  2630  		}
  2631  		else
  2632  		{
  2633  			x86_64_movups_reg_membase(inst, scratch_xreg,
  2634  									  sreg, soffset + offset);
  2635  			x86_64_movups_membase_reg(inst, dreg, doffset + offset,
  2636  									  scratch_xreg);
  2637  		}
  2638  		size -= 16;
  2639  		offset += 16;
  2640  	}
  2641  
  2642  	/* Now copy the rest of the struct */
  2643  	for(i = 8; i > 0; i /= 2)
  2644  	{
  2645  		if(size >= i)
  2646  		{
  2647  			x86_64_mov_reg_membase_size(inst, scratch_reg, sreg,
  2648  										soffset + offset, i);
  2649  			x86_64_mov_membase_reg_size(inst, dreg, doffset + offset,
  2650  										scratch_reg, i);
  2651  			size -= i;
  2652  			offset += i;
  2653  		}
  2654  	}
  2655  	return inst;
  2656  }
  2657  
  2658  /*
  2659   * Copy a struct.
  2660   * The size of the type must be <= 4 * 16bytes
  2661   */
  2662  static unsigned char *
  2663  small_struct_copy(jit_gencode_t gen, unsigned char *inst,
  2664  				  int dreg, jit_nint doffset,
  2665  				  int sreg, jit_nint soffset, jit_type_t type,
  2666  				  int scratch_reg, int scratch_xreg)
  2667  {
  2668  	int size = jit_type_get_size(type);
  2669  	int alignment = jit_type_get_alignment(type);
  2670  
  2671  	return small_block_copy(gen, inst, dreg, doffset,
  2672  							sreg, soffset, size, scratch_reg,
  2673  							scratch_xreg, ((alignment & 0xf) == 0));
  2674  }
  2675  
  2676  /*
  2677   * Copy a block of memory that has a specific size. All call clobbered
  2678   * registers must be unused at this point.
  2679   */
  2680  static unsigned char *
  2681  memory_copy(jit_gencode_t gen, unsigned char *inst,
  2682  			int dreg, jit_nint doffset,
  2683  			int sreg, jit_nint soffset, jit_nint size)
  2684  {
  2685  	if(dreg == X86_64_RDI)
  2686  	{
  2687  		if(sreg != X86_64_RSI)
  2688  		{
  2689  			x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8);
  2690  		}
  2691  	}
  2692  	else if(dreg == X86_64_RSI)
  2693  	{
  2694  		if(sreg == X86_64_RDI)
  2695  		{
  2696  			/* The registers are swapped so we need a temporary register */
  2697  			x86_64_mov_reg_reg_size(inst, X86_64_RCX, X86_64_RSI, 8);
  2698  			x86_64_mov_reg_reg_size(inst, X86_64_RSI, X86_64_RDI, 8);
  2699  			x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RCX, 8);
  2700  		}
  2701  		else
  2702  		{
  2703  			x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RSI, 8);
  2704  			if(sreg != X86_64_RSI)
  2705  			{
  2706  				x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8);
  2707  			}
  2708  		}
  2709  	}
  2710  	else
  2711  	{
  2712  		x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8);
  2713  		x86_64_mov_reg_reg_size(inst, X86_64_RDI, dreg, 8);
  2714  	}
  2715  	/* Move the size to argument register 3 now */
  2716  	if((size > 0) && (size <= jit_max_uint))
  2717  	{
  2718  		x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 4);
  2719  	}
  2720  	else
  2721  	{
  2722  		x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 8);
  2723  	}
  2724  	if(soffset != 0)
  2725  	{
  2726  		x86_64_add_reg_imm_size(inst, X86_64_RSI, soffset, 8);
  2727  	}
  2728  	if(doffset != 0)
  2729  	{
  2730  		x86_64_add_reg_imm_size(inst, X86_64_RDI, doffset, 8);
  2731  	}
  2732  	inst = x86_64_call_code(inst, (jit_nint)jit_memcpy);
  2733  	return inst;
  2734  }
  2735  
  2736  /*
  2737   * Fill a small block. This generates inlined code.
  2738   *
  2739   * Set is_aligned to zero if the target location might be not aligned on a
  2740   * 16-byte boundary and to non-zero if the block is always aligned.
  2741   *
  2742   * Set use_sse to zero to disable SSE instructions use (it will make this
  2743   * function ignore scratch_xreg). Set it to non-zero otherwise.
  2744   *
  2745   * We assume that offset + size is in the range -2GB ... +2GB.
  2746   */
  2747  static unsigned char *
  2748  small_block_set(jit_gencode_t gen, unsigned char *inst,
  2749  				int dreg, jit_nint doffset,
  2750  				jit_nuint val, jit_nint size,
  2751  				int scratch_reg, int scratch_xreg,
  2752  				int is_aligned, int use_sse)
  2753  {
  2754  	jit_nint offset = 0;
  2755  	int i;
  2756  
  2757  	/* Make sure only the least significant byte serves as the filler. */
  2758  	val &= 0xff;
  2759  
  2760  	/* Load the filler into a register. */
  2761  	if(val == 0)
  2762  	{
  2763  		if(!use_sse || (size % 16) != 0)
  2764  		{
  2765  			x86_64_clear_reg(inst, scratch_reg);
  2766  		}
  2767  	}
  2768  	else
  2769  	{
  2770  		val |= val << 8;
  2771  		val |= val << 16;
  2772  		val |= val << 32;
  2773  		x86_64_mov_reg_imm_size(inst, scratch_reg, val, 8);
  2774  	}
  2775  
  2776  	/* Fill all 16 byte blocks */
  2777  	if(use_sse)
  2778  	{
  2779  		if(val == 0)
  2780  		{
  2781  			x86_64_clear_xreg(inst, scratch_xreg);
  2782  		}
  2783  		else
  2784  		{
  2785  			x86_64_movq_xreg_reg(inst, scratch_xreg, scratch_reg);
  2786  			x86_64_movlhps(inst, scratch_xreg, scratch_xreg);
  2787  		}
  2788  
  2789  		while(size >= 16)
  2790  		{
  2791  			if(is_aligned)
  2792  			{
  2793  				x86_64_movaps_membase_reg(inst, dreg, doffset + offset,
  2794  										  scratch_xreg);
  2795  			}
  2796  			else
  2797  			{
  2798  				x86_64_movups_membase_reg(inst, dreg, doffset + offset,
  2799  										  scratch_xreg);
  2800  			}
  2801  			size -= 16;
  2802  			offset += 16;
  2803  		}
  2804  	}
  2805  
  2806  	/* Now fill the rest */
  2807  	for(i = 8; i > 0; i /= 2)
  2808  	{
  2809  		while(size >= i)
  2810  		{
  2811  			x86_64_mov_membase_reg_size(inst, dreg, doffset + offset,
  2812  										scratch_reg, i);
  2813  			size -= i;
  2814  			offset += i;
  2815  		}
  2816  	}
  2817  	return inst;
  2818  }
  2819  
  2820  void
  2821  _jit_gen_start_block(jit_gencode_t gen, jit_block_t block)
  2822  {
  2823  	jit_int *fixup;
  2824  	jit_int *next;
  2825  	void **absolute_fixup;
  2826  	void **absolute_next;
  2827  
  2828  	/* Set the address of this block */
  2829  	block->address = (void *)(gen->ptr);
  2830  
  2831  	/* If this block has pending fixups, then apply them now */
  2832  	fixup = (jit_int *)(block->fixup_list);
  2833  	if(DEBUG_FIXUPS && fixup)
  2834  	{
  2835  		fprintf(stderr, "Block: %lx\n", (jit_nint)block);
  2836  	}
  2837  	while(fixup != 0)
  2838  	{
  2839  		if(DEBUG_FIXUPS)
  2840  		{
  2841  			fprintf(stderr, "Fixup Address: %lx, Value: %x\n",
  2842  					(jit_nint)fixup, fixup[0]);
  2843  		}
  2844  		next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]);
  2845  		fixup[0] = (jit_int)
  2846  			(((jit_nint)(block->address)) - ((jit_nint)fixup) - 4);
  2847  		fixup = next;
  2848  	}
  2849  	block->fixup_list = 0;
  2850  
  2851  	/* Absolute fixups contain complete pointers */
  2852  	absolute_fixup = (void**)(block->fixup_absolute_list);
  2853  	while(absolute_fixup != 0)
  2854  	{
  2855  		absolute_next = (void **)(absolute_fixup[0]);
  2856  		absolute_fixup[0] = (void *)((jit_nint)(block->address));
  2857  		absolute_fixup = absolute_next;
  2858  	}
  2859  	block->fixup_absolute_list = 0;
  2860  }
  2861  
  2862  void
  2863  _jit_gen_end_block(jit_gencode_t gen, jit_block_t block)
  2864  {
  2865  	/* Nothing to do here for x86 */
  2866  }
  2867  
  2868  int
  2869  _jit_gen_is_global_candidate(jit_type_t type)
  2870  {
  2871  	switch(jit_type_remove_tags(type)->kind)
  2872  	{
  2873  		case JIT_TYPE_SBYTE:
  2874  		case JIT_TYPE_UBYTE:
  2875  		case JIT_TYPE_SHORT:
  2876  		case JIT_TYPE_USHORT:
  2877  		case JIT_TYPE_INT:
  2878  		case JIT_TYPE_UINT:
  2879  		case JIT_TYPE_LONG:
  2880  		case JIT_TYPE_ULONG:
  2881  		case JIT_TYPE_NINT:
  2882  		case JIT_TYPE_NUINT:
  2883  		case JIT_TYPE_PTR:
  2884  		case JIT_TYPE_SIGNATURE:
  2885  		{
  2886  			return 1;
  2887  		}
  2888  	}
  2889  	return 0;
  2890  }
  2891  
  2892  /*
  2893   * Do the stuff usually handled in jit-rules.c for native implementations
  2894   * here too because the common implementation is not enough for x86_64.
  2895   */
  2896  
  2897  /*
  2898   * Determine if a type corresponds to a structure or union.
  2899   */
  2900  static int
  2901  is_struct_or_union(jit_type_t type)
  2902  {
  2903  	type = jit_type_normalize(type);
  2904  	if(type)
  2905  	{
  2906  		if(type->kind == JIT_TYPE_STRUCT || type->kind == JIT_TYPE_UNION)
  2907  		{
  2908  			return 1;
  2909  		}
  2910  	}
  2911  	return 0;
  2912  }
  2913  
  2914  static int
  2915  _jit_classify_struct_return(jit_param_passing_t *passing,
  2916  					_jit_param_t *param, jit_type_t return_type)
  2917  {
  2918  	/* Initialize the param passing structure */
  2919  	jit_memset(passing, 0, sizeof(jit_param_passing_t));
  2920  	jit_memset(param, 0, sizeof(_jit_param_t));
  2921  
  2922  	passing->word_regs = _jit_word_return_regs;
  2923  	passing->max_word_regs = _jit_num_word_return_regs;
  2924  	passing->float_regs = _jit_sse_return_regs;
  2925  	passing->max_float_regs = _jit_num_sse_return_regs;
  2926  
  2927  	if(!(_jit_classify_struct(passing, param, return_type)))
  2928  	{
  2929  		return 0;
  2930  	}
  2931  
  2932  	return 1;
  2933  }
  2934  
  2935  /*
  2936   * Load a struct to the register(s) in which it will be returned.
  2937   */
  2938  static unsigned char *
  2939  return_struct(unsigned char *inst, jit_function_t func, int ptr_reg)
  2940  {
  2941  	jit_type_t return_type;
  2942  	jit_type_t signature = jit_function_get_signature(func);
  2943  
  2944  	return_type = jit_type_get_return(signature);
  2945  	if(is_struct_or_union(return_type))
  2946  	{
  2947  		jit_nuint size;
  2948  		jit_param_passing_t passing;
  2949  		_jit_param_t return_param;
  2950  
  2951  		if(!_jit_classify_struct_return(&passing, &return_param,
  2952  										return_type))
  2953  		{
  2954  			/* It's an error so simply return insn */
  2955  			return inst;
  2956  		}
  2957  
  2958  		size = jit_type_get_size(return_type);
  2959  		if(size <= 8)
  2960  		{
  2961  			/* one register is used for returning the value */
  2962  			if(IS_GENERAL_REG(return_param.un.reg_info[0].reg))
  2963  			{
  2964  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  2965  
  2966  				if(size <= 4)
  2967  				{
  2968  					x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 4);
  2969  				}
  2970  				else
  2971  				{
  2972  					x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 8);
  2973  				}
  2974  			}
  2975  			else
  2976  			{
  2977  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  2978  
  2979  				if(size <= 4)
  2980  				{
  2981  					x86_64_movss_reg_regp(inst, reg, ptr_reg);
  2982  				}
  2983  				else
  2984  				{
  2985  					x86_64_movsd_reg_regp(inst, reg, ptr_reg);
  2986  				}
  2987  			}
  2988  		}
  2989  		else
  2990  		{
  2991  			/* In this case we might need up to two registers */
  2992  			if(return_param.arg_class == 1)
  2993  			{
  2994  				/* This must be one xmm register */
  2995  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  2996  				int alignment = jit_type_get_alignment(return_type);
  2997  
  2998  				if((alignment & 0xf) == 0)
  2999  				{
  3000  					/* The type is aligned on a 16 byte boundary */
  3001  					x86_64_movaps_reg_regp(inst, reg, ptr_reg);
  3002  				}
  3003  				else
  3004  				{
  3005  					x86_64_movups_reg_regp(inst, reg, ptr_reg);
  3006  				}
  3007  			}
  3008  			else
  3009  			{
  3010  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  3011  
  3012  				if(IS_GENERAL_REG(return_param.un.reg_info[0].reg))
  3013  				{
  3014  					x86_64_mov_reg_regp_size(inst, reg,
  3015  											 ptr_reg, 8);
  3016  				}
  3017  				else
  3018  				{
  3019  					x86_64_movsd_reg_regp(inst, reg, ptr_reg);
  3020  				}
  3021  				size -= 8;
  3022  				reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg;
  3023  				if(IS_GENERAL_REG(return_param.un.reg_info[1].reg))
  3024  				{
  3025  					if(size <= 4)
  3026  					{
  3027  						x86_64_mov_reg_membase_size(inst, reg, ptr_reg,
  3028  													8, 4);
  3029  					}
  3030  					else
  3031  					{
  3032  						x86_64_mov_reg_membase_size(inst, reg, ptr_reg,
  3033  													8, 8);
  3034  					}
  3035  				}
  3036  				else
  3037  				{
  3038  					if(size <= 4)
  3039  					{
  3040  						x86_64_movss_reg_membase(inst, reg,
  3041  												 ptr_reg, 8);
  3042  					}
  3043  					else
  3044  					{
  3045  						x86_64_movsd_reg_membase(inst, reg,
  3046  												 ptr_reg, 8);
  3047  					}
  3048  				}
  3049  			}
  3050  		}
  3051  	}
  3052  	return inst;
  3053  }
  3054  
  3055  /*
  3056   * Flush a struct return value from the registers to the value
  3057   * on the stack.
  3058   */
  3059  static unsigned char *
  3060  flush_return_struct(unsigned char *inst, jit_value_t value)
  3061  {
  3062  	jit_type_t return_type;
  3063  
  3064  	return_type = jit_value_get_type(value);
  3065  	if(is_struct_or_union(return_type))
  3066  	{
  3067  		jit_nuint size;
  3068  		jit_nint offset;
  3069  		jit_param_passing_t passing;
  3070  		_jit_param_t return_param;
  3071  
  3072  		if(!_jit_classify_struct_return(&passing, &return_param, return_type))
  3073  		{
  3074  			/* It's an error so simply return insn */
  3075  			return inst;
  3076  		}
  3077  
  3078  		return_param.value = value;
  3079  
  3080  		_jit_gen_fix_value(value);
  3081  		size = jit_type_get_size(return_type);
  3082  		offset = value->frame_offset;
  3083  		if(size <= 8)
  3084  		{
  3085  			/* one register is used for returning the value */
  3086  			if(IS_GENERAL_REG(return_param.un.reg_info[0].reg))
  3087  			{
  3088  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  3089  
  3090  				if(size <= 4)
  3091  				{
  3092  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 4);
  3093  				}
  3094  				else
  3095  				{
  3096  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 8);
  3097  				}
  3098  			}
  3099  			else
  3100  			{
  3101  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  3102  
  3103  				if(size <= 4)
  3104  				{
  3105  					x86_64_movss_membase_reg(inst, X86_64_RBP, offset, reg);
  3106  				}
  3107  				else
  3108  				{
  3109  					x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg);
  3110  				}
  3111  			}
  3112  		}
  3113  		else
  3114  		{
  3115  			/* In this case we might need up to two registers */
  3116  			if(return_param.arg_class == 1)
  3117  			{
  3118  				/* This must be one xmm register */
  3119  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  3120  				int alignment = jit_type_get_alignment(return_type);
  3121  
  3122  				if((alignment & 0xf) == 0)
  3123  				{
  3124  					/* The type is aligned on a 16 byte boundary */
  3125  					x86_64_movaps_membase_reg(inst, X86_64_RBP, offset, reg);
  3126  				}
  3127  				else
  3128  				{
  3129  					x86_64_movups_membase_reg(inst, X86_64_RBP, offset, reg);
  3130  				}
  3131  			}
  3132  			else
  3133  			{
  3134  				int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg;
  3135  
  3136  				if(IS_GENERAL_REG(return_param.un.reg_info[0].reg))
  3137  				{
  3138  					x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset,
  3139  												reg, 8);
  3140  				}
  3141  				else
  3142  				{
  3143  					x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg);
  3144  				}
  3145  				size -= 8;
  3146  				reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg;
  3147  				if(IS_GENERAL_REG(return_param.un.reg_info[1].reg))
  3148  				{
  3149  					if(size <= 4)
  3150  					{
  3151  						x86_64_mov_membase_reg_size(inst, X86_64_RBP,
  3152  													offset + 8, reg, 4);
  3153  					}
  3154  					else
  3155  					{
  3156  						x86_64_mov_membase_reg_size(inst, X86_64_RBP,
  3157  													offset + 8, reg, 8);
  3158  					}
  3159  				}
  3160  				else
  3161  				{
  3162  					if(size <= 4)
  3163  					{
  3164  						x86_64_movss_membase_reg(inst, X86_64_RBP,
  3165  												 offset + 8, reg);
  3166  					}
  3167  					else
  3168  					{
  3169  						x86_64_movsd_membase_reg(inst, X86_64_RBP,
  3170  												 offset + 8, reg);
  3171  					}
  3172  				}
  3173  			}
  3174  		}
  3175  	}
  3176  	return inst;
  3177  }
  3178  
  3179  void
  3180  _jit_gen_insn(jit_gencode_t gen, jit_function_t func,
  3181  			  jit_block_t block, jit_insn_t insn)
  3182  {
  3183  	switch(insn->opcode)
  3184  	{
  3185  	#define JIT_INCLUDE_RULES
  3186  	#include "jit-rules-x86-64.inc"
  3187  	#undef JIT_INCLUDE_RULES
  3188  
  3189  	default:
  3190  		{
  3191  			fprintf(stderr, "TODO(%x) at %s, %d\n",
  3192  				(int)(insn->opcode), __FILE__, (int)__LINE__);
  3193  		}
  3194  		break;
  3195  	}
  3196  }
  3197  
  3198  /*
  3199   * Fixup the passing area after all parameters have been allocated either
  3200   * in registers or on the stack.
  3201   * This is typically used for adding pad words for keeping the stack aligned.
  3202   */
  3203  void
  3204  _jit_fix_call_stack(jit_param_passing_t *passing)
  3205  {
  3206  	if((passing->stack_size & 0x0f) != 0)
  3207  	{
  3208  		passing->stack_size = (passing->stack_size + 0x0f) & ~((jit_nint)0x0f);
  3209  		passing->stack_pad = 1;
  3210  	}
  3211  }
  3212  
  3213  #ifndef JIT_USE_PARAM_AREA
  3214  /*
  3215   * Setup the call stack before pushing any parameters.
  3216   * This is used usually for pushing pad words for alignment.
  3217   * The function is needed only if the backend doesn't work with the
  3218   * parameter area.
  3219   */
  3220  int
  3221  _jit_setup_call_stack(jit_function_t func, jit_param_passing_t *passing)
  3222  {
  3223  	if(passing->stack_pad)
  3224  	{
  3225  		int current;
  3226  		jit_value_t pad_value;
  3227  
  3228  		pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0);
  3229  		if(!pad_value)
  3230  		{
  3231  			return 0;
  3232  		}
  3233  		for(current = 0; current < passing->stack_pad; ++current)
  3234  		{
  3235  			if(!jit_insn_push(func, pad_value))
  3236  			{
  3237  				return 0;
  3238  			}
  3239  		}
  3240  	}
  3241  	return 1;
  3242  }
  3243  #endif /* !JIT_USE_PARAM_AREA */
  3244  
  3245  /*
  3246   * Push a parameter onto the stack.
  3247   */
  3248  static int
  3249  push_param(jit_function_t func, _jit_param_t *param, jit_type_t type)
  3250  {
  3251  	if(is_struct_or_union(type) && !is_struct_or_union(param->value->type))
  3252  	{
  3253  		jit_value_t value;
  3254  
  3255  		if(!(value = jit_insn_address_of(func, param->value)))
  3256  		{
  3257  			return 0;
  3258  		}
  3259  	#ifdef JIT_USE_PARAM_AREA
  3260  		/* Copy the value into the outgoing parameter area, by pointer */
  3261  		if(!jit_insn_set_param_ptr(func, value, type, param->un.offset))
  3262  		{
  3263  			return 0;
  3264  		}
  3265  	#else
  3266  		/* Push the parameter value onto the stack, by pointer */
  3267  		if(!jit_insn_push_ptr(func, value, type))
  3268  		{
  3269  			return 0;
  3270  		}
  3271  		if(param->stack_pad)
  3272  		{
  3273  			int current;
  3274  			jit_value_t pad_value;
  3275  
  3276  			pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0);
  3277  			if(!pad_value)
  3278  			{
  3279  				return 0;
  3280  			}
  3281  			for(current = 0; current < param->stack_pad; ++current)
  3282  			{
  3283  				if(!jit_insn_push(func, pad_value))
  3284  				{
  3285  					return 0;
  3286  				}
  3287  			}
  3288  		}
  3289  	#endif
  3290  	}
  3291  	else
  3292  	{
  3293  	#ifdef JIT_USE_PARAM_AREA
  3294  		/* Copy the value into the outgoing parameter area */
  3295  		if(!jit_insn_set_param(func, param->value, param->un.offset))
  3296  		{
  3297  			return 0;
  3298  		}
  3299  	#else
  3300  		/* Push the parameter value onto the stack */
  3301  		if(!jit_insn_push(func, param->value))
  3302  		{
  3303  			return 0;
  3304  		}
  3305  		if(param->stack_pad)
  3306  		{
  3307  			int current;
  3308  			jit_value_t pad_value;
  3309  
  3310  			pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0);
  3311  			if(!pad_value)
  3312  			{
  3313  				return 0;
  3314  			}
  3315  			for(current = 0; current < param->stack_pad; ++current)
  3316  			{
  3317  				if(!jit_insn_push(func, pad_value))
  3318  				{
  3319  					return 0;
  3320  				}
  3321  			}
  3322  		}
  3323  	#endif
  3324  	}
  3325  	return 1;
  3326  }
  3327  
  3328  int
  3329  _jit_setup_reg_param(jit_function_t func, _jit_param_t *param,
  3330  					 jit_type_t param_type)
  3331  {
  3332  	if(param->arg_class == 1)
  3333  	{
  3334  		param->un.reg_info[0].value = param->value;
  3335  	}
  3336  	else if(param->arg_class == 2)
  3337  	{
  3338  		jit_nint size = jit_type_get_size(param_type);
  3339  		jit_value_t value_ptr;
  3340  
  3341  		if(!(value_ptr = jit_insn_address_of(func, param->value)))
  3342  		{
  3343  			return 0;
  3344  		}
  3345  		if(IS_GENERAL_REG(param->un.reg_info[0].reg))
  3346  		{
  3347  			param->un.reg_info[0].value =
  3348  				jit_insn_load_relative(func, value_ptr, 0, jit_type_long);
  3349  			if(!(param->un.reg_info[0].value))
  3350  			{
  3351  				return 0;
  3352  			}
  3353  		}
  3354  		else
  3355  		{
  3356  			param->un.reg_info[0].value =
  3357  				jit_insn_load_relative(func, value_ptr, 0, jit_type_float64);
  3358  			if(!(param->un.reg_info[0].value))
  3359  			{
  3360  				return 0;
  3361  			}
  3362  		}
  3363  		size -= 8;
  3364  		if(IS_GENERAL_REG(param->un.reg_info[1].reg))
  3365  		{
  3366  			if(size <= 4)
  3367  			{
  3368  				param->un.reg_info[1].value =
  3369  					jit_insn_load_relative(func, value_ptr, 8, jit_type_int);
  3370  				if(!(param->un.reg_info[1].value))
  3371  				{
  3372  					return 0;
  3373  				}
  3374  			}
  3375  			else
  3376  			{
  3377  				param->un.reg_info[1].value =
  3378  					jit_insn_load_relative(func, value_ptr, 8, jit_type_long);
  3379  				if(!(param->un.reg_info[1].value))
  3380  				{
  3381  					return 0;
  3382  				}
  3383  			}
  3384  		}
  3385  		else
  3386  		{
  3387  			if(size <= 4)
  3388  			{
  3389  				param->un.reg_info[1].value =
  3390  					jit_insn_load_relative(func, value_ptr, 8, jit_type_float32);
  3391  				if(!(param->un.reg_info[1].value))
  3392  				{
  3393  					return 0;
  3394  				}
  3395  			}
  3396  			else
  3397  			{
  3398  				param->un.reg_info[1].value =
  3399  					jit_insn_load_relative(func, value_ptr, 8, jit_type_float64);
  3400  				if(!(param->un.reg_info[1].value))
  3401  				{
  3402  					return 0;
  3403  				}
  3404  			}
  3405  		}
  3406  	}
  3407  	return 1;
  3408  }
  3409  
  3410  int
  3411  _jit_flush_incoming_struct(jit_function_t func, _jit_param_t *param,
  3412  						  jit_type_t param_type)
  3413  {
  3414  	if(param->arg_class == 2)
  3415  	{
  3416  		jit_value_t address;
  3417  
  3418  		/* Now store the two values in place */
  3419  		if(!(address = jit_insn_address_of(func, param->value)))
  3420  		{
  3421  			return 0;
  3422  		}
  3423  		if(!jit_insn_store_relative(func, address, 0, param->un.reg_info[0].value))
  3424  		{
  3425  			return 0;
  3426  		}
  3427  		if(!jit_insn_store_relative(func, address, 8, param->un.reg_info[1].value))
  3428  		{
  3429  			return 0;
  3430  		}
  3431  	}
  3432  	return 1;
  3433  }
  3434  
  3435  int
  3436  _jit_setup_incoming_param(jit_function_t func, _jit_param_t *param,
  3437  						  jit_type_t param_type)
  3438  {
  3439  	if(param->arg_class == JIT_ARG_CLASS_STACK)
  3440  	{
  3441  		/* The parameter is passed on the stack */
  3442  		if(!jit_insn_incoming_frame_posn
  3443  				(func, param->value, param->un.offset))
  3444  		{
  3445  			return 0;
  3446  		}
  3447  	}
  3448  	else
  3449  	{
  3450  		param_type = jit_type_remove_tags(param_type);
  3451  
  3452  		switch(param_type->kind)
  3453  		{
  3454  			case JIT_TYPE_STRUCT:
  3455  			case JIT_TYPE_UNION:
  3456  			{
  3457  				if(param->arg_class == 1)
  3458  				{
  3459  					if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg))
  3460  					{
  3461  						return 0;
  3462  					}
  3463  				}
  3464  				else
  3465  				{
  3466  					/* These cases have to be handled specially */
  3467  					/* The struct is passed in two registers */
  3468  					jit_nuint size = jit_type_get_size(param_type);
  3469  
  3470  					/* The first part is allways a full eightbyte */
  3471  					if(IS_GENERAL_REG(param->un.reg_info[0].reg))
  3472  					{
  3473  						if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_long)))
  3474  						{
  3475  							return 0;
  3476  						}
  3477  					}
  3478  					else
  3479  					{
  3480  						if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_float64)))
  3481  						{
  3482  							return 0;
  3483  						}
  3484  					}
  3485  					size -= 8;
  3486  					/* The second part might be of any size <= 8 */
  3487  					if(IS_GENERAL_REG(param->un.reg_info[1].reg))
  3488  					{
  3489  						if(size <= 4)
  3490  						{
  3491  							if(!(param->un.reg_info[1].value =
  3492  									jit_value_create(func, jit_type_int)))
  3493  							{
  3494  								return 0;
  3495  							}
  3496  						}
  3497  						else
  3498  						{
  3499  							if(!(param->un.reg_info[1].value =
  3500  									jit_value_create(func, jit_type_long)))
  3501  							{
  3502  								return 0;
  3503  							}
  3504  						}
  3505  					}
  3506  					else
  3507  					{
  3508  						if(size <= 4)
  3509  						{
  3510  							if(!(param->un.reg_info[1].value =
  3511  									jit_value_create(func, jit_type_float32)))
  3512  							{
  3513  								return 0;
  3514  							}
  3515  						}
  3516  						else
  3517  						{
  3518  							if(!(param->un.reg_info[1].value =
  3519  									jit_value_create(func, jit_type_float64)))
  3520  							{
  3521  								return 0;
  3522  							}
  3523  						}
  3524  					}
  3525  					if(!jit_insn_incoming_reg(func,
  3526  											  param->un.reg_info[0].value,
  3527  											  param->un.reg_info[0].reg))
  3528  					{
  3529  						return 0;
  3530  					}
  3531  					if(!jit_insn_incoming_reg(func,
  3532  											  param->un.reg_info[1].value,
  3533  											  param->un.reg_info[1].reg))
  3534  					{
  3535  						return 0;
  3536  					}
  3537  				}
  3538  			}
  3539  			break;
  3540  
  3541  			default:
  3542  			{
  3543  				if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg))
  3544  				{
  3545  					return 0;
  3546  				}
  3547  			}
  3548  			break;
  3549  		}
  3550  	}
  3551  	return 1;
  3552  }
  3553  
  3554  int
  3555  _jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param,
  3556  						  jit_type_t param_type)
  3557  {
  3558  	if(param->arg_class == JIT_ARG_CLASS_STACK)
  3559  	{
  3560  		/* The parameter is passed on the stack */
  3561  		if(!push_param(func, param, param_type))
  3562  		{
  3563  			return 0;
  3564  		}
  3565  	}
  3566  	else
  3567  	{
  3568  		if(!jit_insn_outgoing_reg(func, param->un.reg_info[0].value,
  3569  										param->un.reg_info[0].reg))
  3570  		{
  3571  			return 0;
  3572  		}
  3573  		if(param->arg_class == 2)
  3574  		{
  3575  			if(!jit_insn_outgoing_reg(func, param->un.reg_info[1].value,
  3576  											param->un.reg_info[1].reg))
  3577  			{
  3578  				return 0;
  3579  			}
  3580  		}
  3581  	}
  3582  	return 1;
  3583  }
  3584  
  3585  int
  3586  _jit_setup_return_value(jit_function_t func, jit_value_t return_value,
  3587  						jit_type_t return_type)
  3588  
  3589  {
  3590  	/* Structure values must be flushed into the frame, and
  3591  	   everything else ends up in a register */
  3592  	if(is_struct_or_union(return_type))
  3593  	{
  3594  		jit_param_passing_t passing;
  3595  		_jit_param_t return_param;
  3596  
  3597  		if(!_jit_classify_struct_return(&passing, &return_param, return_type))
  3598  		{
  3599  			/* It's an error so simply return insn */
  3600  			return 0;
  3601  		}
  3602  
  3603  		if(return_param.arg_class == 1)
  3604  		{
  3605  			if(!jit_insn_return_reg(func, return_value,
  3606  									return_param.un.reg_info[0].reg))
  3607  			{
  3608  				return 0;
  3609  			}
  3610  		}
  3611  		else
  3612  		{
  3613  			if(!jit_insn_flush_struct(func, return_value))
  3614  			{
  3615  				return 0;
  3616  			}
  3617  		}
  3618  	}
  3619  	else if(return_type == jit_type_float32 ||
  3620  			return_type == jit_type_float64)
  3621  	{
  3622  		if(!jit_insn_return_reg(func, return_value, X86_64_REG_XMM0))
  3623  		{
  3624  			return 0;
  3625  		}
  3626  	}
  3627  	else if(return_type == jit_type_nfloat)
  3628  	{
  3629  		if(!jit_insn_return_reg(func, return_value, X86_64_REG_ST0))
  3630  		{
  3631  			return 0;
  3632  		}
  3633  	}
  3634  	else if(return_type->kind != JIT_TYPE_VOID)
  3635  	{
  3636  		if(!jit_insn_return_reg(func, return_value, X86_64_REG_RAX))
  3637  		{
  3638  			return 0;
  3639  		}
  3640  	}
  3641  	return 1;
  3642  }
  3643  
  3644  void
  3645  _jit_init_args(int abi, jit_param_passing_t *passing)
  3646  {
  3647  	passing->max_word_regs = _jit_num_word_regs;
  3648  	passing->word_regs = _jit_word_arg_regs;
  3649  	passing->max_float_regs = _jit_num_float_regs;
  3650  	passing->float_regs = _jit_float_arg_regs;
  3651  }
  3652  
  3653  int
  3654  _jit_create_entry_insns(jit_function_t func)
  3655  {
  3656  	jit_value_t value;
  3657  	int has_struct_return = 0;
  3658  	jit_type_t signature = func->signature;
  3659  	int abi = jit_type_get_abi(signature);
  3660  	unsigned int num_args = jit_type_num_params(signature);
  3661  	jit_param_passing_t passing;
  3662  	_jit_param_t param[num_args];
  3663  	_jit_param_t nested_param;
  3664  	_jit_param_t struct_return_param;
  3665  	int current_param;
  3666  
  3667  	/* Reset the local variable frame size for this function */
  3668  	func->builder->frame_size = JIT_INITIAL_FRAME_SIZE;
  3669  
  3670  	/* Initialize the param passing structure */
  3671  	jit_memset(&passing, 0, sizeof(jit_param_passing_t));
  3672  	jit_memset(param, 0, sizeof(_jit_param_t) * num_args);
  3673  
  3674  	passing.params = param;
  3675  	passing.stack_size = JIT_INITIAL_STACK_OFFSET;
  3676  
  3677  	/* Let the specific backend initialize it's part of the params */
  3678  	_jit_init_args(abi, &passing);
  3679  
  3680  	/* Allocate the structure return pointer */
  3681  	if((value = jit_value_get_struct_pointer(func)))
  3682  	{
  3683  		jit_memset(&struct_return_param, 0, sizeof(_jit_param_t));
  3684  		if(!(_jit_classify_param(&passing, &struct_return_param,
  3685  								 jit_type_void_ptr)))
  3686  		{
  3687  			return 0;
  3688  		}
  3689  		struct_return_param.value = value;
  3690  		has_struct_return = 1;
  3691  	}
  3692  
  3693  	/* If the function is nested, then we need an extra parameter
  3694  	   to pass the pointer to the parent's local variable frame */
  3695  	if(func->nested_parent)
  3696  	{
  3697  		jit_memset(&nested_param, 0, sizeof(_jit_param_t));
  3698  		if(!(_jit_classify_param(&passing, &nested_param,
  3699  								 jit_type_void_ptr)))
  3700  		{
  3701  			return 0;
  3702  		}
  3703  
  3704  		nested_param.value = jit_value_create(func, jit_type_void_ptr);
  3705  		jit_function_set_parent_frame(func, nested_param.value);
  3706  	}
  3707  
  3708  	/* Let the backend classify the parameters */
  3709  	for(current_param = 0; current_param < num_args; current_param++)
  3710  	{
  3711  		jit_type_t param_type;
  3712  
  3713  		param_type = jit_type_get_param(signature, current_param);
  3714  		param_type = jit_type_normalize(param_type);
  3715  
  3716  		if(!(_jit_classify_param(&passing, &(passing.params[current_param]),
  3717  								 param_type)))
  3718  		{
  3719  			return 0;
  3720  		}
  3721  	}
  3722  
  3723  	/* Now we can setup the incoming parameters */
  3724  	for(current_param = 0; current_param < num_args; current_param++)
  3725  	{
  3726  		jit_type_t param_type;
  3727  
  3728  		param_type = jit_type_get_param(signature, current_param);
  3729  		if(!(param[current_param].value))
  3730  		{
  3731  			if(!(param[current_param].value = jit_value_get_param(func, current_param)))
  3732  			{
  3733  				return 0;
  3734  			}
  3735  		}
  3736  		if(!_jit_setup_incoming_param(func, &(param[current_param]), param_type))
  3737  		{
  3738  			return 0;
  3739  		}
  3740  	}
  3741  
  3742  	if(func->nested_parent)
  3743  	{
  3744  		if(!_jit_setup_incoming_param(func, &nested_param, jit_type_void_ptr))
  3745  		{
  3746  			return 0;
  3747  		}
  3748  	}
  3749  
  3750  	if(has_struct_return)
  3751  	{
  3752  		if(!_jit_setup_incoming_param(func, &struct_return_param, jit_type_void_ptr))
  3753  		{
  3754  			return 0;
  3755  		}
  3756  	}
  3757  
  3758  	/* Now we flush the incoming structs passed in registers */
  3759  	for(current_param = 0; current_param < num_args; current_param++)
  3760  	{
  3761  		if(param[current_param].arg_class != JIT_ARG_CLASS_STACK)
  3762  		{
  3763  			jit_type_t param_type;
  3764  
  3765  			param_type = jit_type_get_param(signature, current_param);
  3766  			if(!_jit_flush_incoming_struct(func, &(param[current_param]),
  3767  										   param_type))
  3768  			{
  3769  				return 0;
  3770  			}
  3771  		}
  3772  	}
  3773  
  3774  	return 1;
  3775  }
  3776  
  3777  int _jit_create_call_setup_insns
  3778  	(jit_function_t func, jit_type_t signature,
  3779  	 jit_value_t *args, unsigned int num_args,
  3780  	 int is_nested, jit_value_t parent_frame,
  3781  	 jit_value_t *struct_return, int flags)
  3782  {
  3783  	int abi = jit_type_get_abi(signature);
  3784  	jit_type_t return_type;
  3785  	jit_value_t value;
  3786  	jit_value_t return_ptr;
  3787  	int current_param;
  3788  	jit_param_passing_t passing;
  3789  	_jit_param_t param[num_args];
  3790  	_jit_param_t nested_param;
  3791  	_jit_param_t struct_return_param;
  3792  
  3793  	/* Initialize the param passing structure */
  3794  	jit_memset(&passing, 0, sizeof(jit_param_passing_t));
  3795  	jit_memset(param, 0, sizeof(_jit_param_t) * num_args);
  3796  
  3797  	passing.params = param;
  3798  	passing.stack_size = 0;
  3799  
  3800  	/* Let the specific backend initialize it's part of the params */
  3801  	_jit_init_args(abi, &passing);
  3802  
  3803  	/* Determine if we need an extra hidden parameter for returning a
  3804  	   structure */
  3805  	return_type = jit_type_get_return(signature);
  3806  	if(jit_type_return_via_pointer(return_type))
  3807  	{
  3808  		value = jit_value_create(func, return_type);
  3809  		if(!value)
  3810  		{
  3811  			return 0;
  3812  		}
  3813  		*struct_return = value;
  3814  		return_ptr = jit_insn_address_of(func, value);
  3815  		if(!return_ptr)
  3816  		{
  3817  			return 0;
  3818  		}
  3819  		jit_memset(&struct_return_param, 0, sizeof(_jit_param_t));
  3820  		struct_return_param.value = return_ptr;
  3821  		if(!(_jit_classify_param(&passing, &struct_return_param,
  3822  								 jit_type_void_ptr)))
  3823  		{
  3824  			return 0;
  3825  		}
  3826  	}
  3827  	else
  3828  	{
  3829  		*struct_return = 0;
  3830  		return_ptr = 0;
  3831  	}
  3832  
  3833  	/* Determine how many parameters are going to end up in word registers,
  3834  	   and compute the largest stack size needed to pass stack parameters */
  3835  	if(is_nested)
  3836  	{
  3837  		jit_memset(&nested_param, 0, sizeof(_jit_param_t));
  3838  		if(!(_jit_classify_param(&passing, &nested_param,
  3839  								 jit_type_void_ptr)))
  3840  		{
  3841  			return 0;
  3842  		}
  3843  
  3844  		nested_param.value = parent_frame;
  3845  	}
  3846  
  3847  	/* Let the backend classify the parameters */
  3848  	for(current_param = 0; current_param < num_args; current_param++)
  3849  	{
  3850  		jit_type_t param_type;
  3851  
  3852  		param_type = jit_type_get_param(signature, current_param);
  3853  		param_type = jit_type_normalize(param_type);
  3854  
  3855  		if(!(_jit_classify_param(&passing, &(passing.params[current_param]),
  3856  								 param_type)))
  3857  		{
  3858  			return 0;
  3859  		}
  3860  		/* Set the argument value */
  3861  		passing.params[current_param].value = args[current_param];
  3862  	}
  3863  
  3864  	/* Let the backend do final adjustments to the passing area */
  3865  	_jit_fix_call_stack(&passing);
  3866  
  3867  #ifdef JIT_USE_PARAM_AREA
  3868  	if(passing.stack_size > func->builder->param_area_size)
  3869  	{
  3870  		func->builder->param_area_size = passing.stack_size;
  3871  	}
  3872  #else
  3873  	/* Flush deferred stack pops from previous calls if too many
  3874  	   parameters have collected up on the stack since last time */
  3875  	if(!jit_insn_flush_defer_pop(func, 32 - passing.stack_size))
  3876  	{
  3877  		return 0;
  3878  	}
  3879  
  3880  	if(!_jit_setup_call_stack(func, &passing))
  3881  	{
  3882  		return 0;
  3883  	}
  3884  #endif
  3885  
  3886  	/* Now setup the arguments on the stack or in the registers in reverse order */
  3887  	/* First process the params passed on the stack */
  3888  	current_param = num_args;
  3889  	while(current_param > 0)
  3890  	{
  3891  		--current_param;
  3892  		if(param[current_param].arg_class == JIT_ARG_CLASS_STACK)
  3893  		{
  3894  			jit_type_t param_type;
  3895  
  3896  			param_type = jit_type_get_param(signature, current_param);
  3897  			if(!_jit_setup_outgoing_param(func, &(param[current_param]), param_type))
  3898  			{
  3899  				return 0;
  3900  			}
  3901  		}
  3902  	}
  3903  
  3904  	/* Handle the structure return pointer if it's passed on the stack */
  3905  	if(return_ptr)
  3906  	{
  3907  		if(struct_return_param.arg_class == JIT_ARG_CLASS_STACK)
  3908  		{
  3909  			if(!_jit_setup_outgoing_param(func, &struct_return_param,
  3910  										  jit_type_void_ptr))
  3911  			{
  3912  				return 0;
  3913  			}
  3914  		}
  3915  	}
  3916  
  3917  	/* Handle the parent's frame pointer if it's passed on the stack */
  3918  	if(is_nested)
  3919  	{
  3920  		if(nested_param.arg_class == JIT_ARG_CLASS_STACK)
  3921  		{
  3922  			if(!_jit_setup_outgoing_param(func, &nested_param,
  3923  										  jit_type_void_ptr))
  3924  			{
  3925  				return 0;
  3926  			}
  3927  		}
  3928  	}
  3929  
  3930  	/* Now setup the values passed in registers */
  3931  	current_param = num_args;
  3932  	while(current_param > 0)
  3933  	{
  3934  		--current_param;
  3935  
  3936  		if(param[current_param].arg_class != JIT_ARG_CLASS_STACK)
  3937  		{
  3938  			jit_type_t param_type;
  3939  
  3940  			param_type = jit_type_get_param(signature, current_param);
  3941  			if(!_jit_setup_reg_param(func, &(param[current_param]), param_type))
  3942  			{
  3943  				return 0;
  3944  			}
  3945  		}
  3946  	}
  3947  
  3948  	/* Handle the parent's frame pointer if required */
  3949  	if(is_nested)
  3950  	{
  3951  		if(nested_param.arg_class != JIT_ARG_CLASS_STACK)
  3952  		{
  3953  			if(!_jit_setup_reg_param(func, &nested_param,
  3954  									 jit_type_void_ptr))
  3955  			{
  3956  				return 0;
  3957  			}
  3958  		}
  3959  	}
  3960  
  3961  	/* Handle the structure return pointer if required */
  3962  	if(return_ptr)
  3963  	{
  3964  		if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK)
  3965  		{
  3966  			if(!_jit_setup_reg_param(func, &struct_return_param,
  3967  									 jit_type_void_ptr))
  3968  			{
  3969  				return 0;
  3970  			}
  3971  		}
  3972  	}
  3973  
  3974  	/* And finally assign the registers */
  3975  	current_param = num_args;
  3976  	while(current_param > 0)
  3977  	{
  3978  		--current_param;
  3979  		if(param[current_param].arg_class != JIT_ARG_CLASS_STACK)
  3980  		{
  3981  			jit_type_t param_type;
  3982  
  3983  			param_type = jit_type_get_param(signature, current_param);
  3984  			if(!_jit_setup_outgoing_param(func, &(param[current_param]),
  3985  										  param_type))
  3986  			{
  3987  				return 0;
  3988  			}
  3989  		}
  3990  	}
  3991  
  3992  	/* Handle the parent's frame pointer if required */
  3993  	if(is_nested)
  3994  	{
  3995  		if(nested_param.arg_class != JIT_ARG_CLASS_STACK)
  3996  		{
  3997  			if(!_jit_setup_outgoing_param(func, &nested_param,
  3998  									 jit_type_void_ptr))
  3999  			{
  4000  				return 0;
  4001  			}
  4002  		}
  4003  	}
  4004  
  4005  	/* Add the structure return pointer if required */
  4006  	if(return_ptr)
  4007  	{
  4008  		if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK)
  4009  		{
  4010  			if(!_jit_setup_outgoing_param(func, &struct_return_param,
  4011  										  jit_type_void_ptr))
  4012  			{
  4013  				return 0;
  4014  			}
  4015  		}
  4016  	}
  4017  
  4018  	return 1;
  4019  }
  4020  
  4021  int
  4022  _jit_create_call_return_insns(jit_function_t func, jit_type_t signature,
  4023  							  jit_value_t *args, unsigned int num_args,
  4024  							  jit_value_t return_value, int is_nested)
  4025  {
  4026  	jit_type_t return_type;
  4027  	int ptr_return;
  4028  #ifndef JIT_USE_PARAM_AREA
  4029  	int abi = jit_type_get_abi(signature);
  4030  	int current_param;
  4031  	jit_param_passing_t passing;
  4032  	_jit_param_t param[num_args];
  4033  	_jit_param_t nested_param;
  4034  	_jit_param_t struct_return_param;
  4035  #endif /* !JIT_USE_PARAM_AREA */
  4036  
  4037  	return_type = jit_type_normalize(jit_type_get_return(signature));
  4038  	ptr_return = jit_type_return_via_pointer(return_type);
  4039  #ifndef JIT_USE_PARAM_AREA
  4040  	/* Initialize the param passing structure */
  4041  	jit_memset(&passing, 0, sizeof(jit_param_passing_t));
  4042  	jit_memset(param, 0, sizeof(_jit_param_t) * num_args);
  4043  
  4044  	passing.params = param;
  4045  	passing.stack_size = 0;
  4046  
  4047  	/* Let the specific backend initialize it's part of the params */
  4048  	_jit_init_args(abi, &passing);
  4049  
  4050  	/* Determine how many parameters are going to end up in word registers,
  4051  	   and compute the largest stack size needed to pass stack parameters */
  4052  	if(is_nested)
  4053  	{
  4054  		jit_memset(&nested_param, 0, sizeof(_jit_param_t));
  4055  		if(!(_jit_classify_param(&passing, &nested_param,
  4056  								 jit_type_void_ptr)))
  4057  		{
  4058  			return 0;
  4059  		}
  4060  	}
  4061  
  4062  	/* Determine if we need an extra hidden parameter for returning a
  4063  	   structure */
  4064  	if(ptr_return)
  4065  	{
  4066  		jit_memset(&struct_return_param, 0, sizeof(_jit_param_t));
  4067  		if(!(_jit_classify_param(&passing, &struct_return_param,
  4068  								 jit_type_void_ptr)))
  4069  		{
  4070  			return 0;
  4071  		}
  4072  	}
  4073  
  4074  	/* Let the backend classify the parameters */
  4075  	for(current_param = 0; current_param < num_args; current_param++)
  4076  	{
  4077  		jit_type_t param_type;
  4078  
  4079  		param_type = jit_type_get_param(signature, current_param);
  4080  		param_type = jit_type_normalize(param_type);
  4081  
  4082  		if(!(_jit_classify_param(&passing, &(passing.params[current_param]),
  4083  								 param_type)))
  4084  		{
  4085  			return 0;
  4086  		}
  4087  	}
  4088  
  4089  	/* Let the backend do final adjustments to the passing area */
  4090  	_jit_fix_call_stack(&passing);
  4091  
  4092  	/* Pop the bytes from the system stack */
  4093  	if(passing.stack_size > 0)
  4094  	{
  4095  		if(!jit_insn_defer_pop_stack(func, passing.stack_size))
  4096  		{
  4097  			return 0;
  4098  		}
  4099  	}
  4100  #endif /* !JIT_USE_PARAM_AREA */
  4101  
  4102  	/* Bail out now if we don't need to worry about return values */
  4103  	if(!return_value || ptr_return)
  4104  	{
  4105  		return 1;
  4106  	}
  4107  
  4108  	if(!_jit_setup_return_value(func, return_value, return_type))
  4109  	{
  4110  		return 0;
  4111  	}
  4112  
  4113  	/* Everything is back where it needs to be */
  4114  	return 1;
  4115  }
  4116  
  4117  #endif /* JIT_BACKEND_X86_64 */