github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-apply-x86-64.c (about)

     1  /*
     2   * jit-apply-x86-64.c - Apply support routines for x86_64.
     3   *
     4   * Copyright (C) 2008  Southern Storm Software, Pty Ltd.
     5   *
     6   * This file is part of the libjit library.
     7   *
     8   * The libjit library is free software: you can redistribute it and/or
     9   * modify it under the terms of the GNU Lesser General Public License
    10   * as published by the Free Software Foundation, either version 2.1 of
    11   * the License, or (at your option) any later version.
    12   *
    13   * The libjit library is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16   * Lesser General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU Lesser General Public
    19   * License along with the libjit library.  If not, see
    20   * <http://www.gnu.org/licenses/>.
    21   */
    22  
    23  #include "jit-internal.h"
    24  #include "jit-apply-rules.h"
    25  #include "jit-apply-func.h"
    26  
    27  #if defined(__amd64) || defined(__amd64__) || defined(_x86_64) || defined(_x86_64__)
    28  
    29  #include "jit-gen-x86-64.h"
    30  
    31  /*
    32   * X86_64 argument types as specified in the X86_64 SysV ABI.
    33   */
    34  #define X86_64_ARG_NO_CLASS		0x00
    35  #define X86_64_ARG_INTEGER		0x01
    36  #define X86_64_ARG_MEMORY		0x02
    37  #define X86_64_ARG_SSE			0x11
    38  #define X86_64_ARG_SSEUP		0x12
    39  #define X86_64_ARG_X87			0x21
    40  #define X86_64_ARG_X87UP		0x22
    41  
    42  #define X86_64_ARG_IS_SSE(arg)	(((arg) & 0x10) != 0)
    43  #define X86_64_ARG_IS_X87(arg)	(((arg) & 0x20) != 0)
    44  
    45  
    46  void _jit_create_closure(unsigned char *buf, void *func,
    47                           void *closure, void *_type)
    48  {
    49  	jit_nint offset;
    50  
    51  	/* Set up the local stack frame */
    52  	x86_64_push_reg_size(buf, X86_64_RBP, 8);
    53  	x86_64_mov_reg_reg_size(buf, X86_64_RBP, X86_64_RSP, 8);
    54  
    55  	/* Create the apply argument block on the stack */
    56  	x86_64_sub_reg_imm_size(buf, X86_64_RSP, 192, 8);
    57  
    58  	/* fill the apply buffer */
    59  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x08, X86_64_RDI, 8);
    60  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x10, X86_64_RSI, 8);
    61  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x18, X86_64_RDX, 8);
    62  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x20, X86_64_RCX, 8);
    63  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x28, X86_64_R8, 8);
    64  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x30, X86_64_R9, 8);
    65  
    66  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM0);
    67  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM1);
    68  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM2);
    69  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM3);
    70  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x80, X86_64_XMM4);
    71  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x90, X86_64_XMM5);
    72  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xA0, X86_64_XMM6);
    73  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xB0, X86_64_XMM7);
    74  
    75  	/* Now fill the arguments for the closure function */
    76  	/* the closure function is #1 */
    77  	x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)closure, 8);
    78  	/* the apply buff is #2 */
    79  	x86_64_mov_reg_reg_size(buf, X86_64_RSI, X86_64_RSP, 8);
    80  
    81  	/* Call the closure handling function */
    82  	offset = (jit_nint)func - ((jit_nint)buf + 5);
    83  	if((offset < jit_min_int) || (offset > jit_max_int))
    84  	{
    85  		/* offset is outside the 32 bit offset range */
    86  		/* so we have to do an indirect call */
    87  		/* We use R11 here because it's the only temporary caller saved */
    88  		/* register not used for argument passing. */
    89  		x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
    90  		x86_64_call_reg(buf, X86_64_R11);
    91  	}
    92  	else
    93  	{
    94  		x86_64_call_imm(buf, (jit_int)offset);
    95  	}
    96  
    97  	/* Pop the current stack frame */
    98  	x86_64_mov_reg_reg_size(buf, X86_64_RSP, X86_64_RBP, 8);
    99  	x86_64_pop_reg_size(buf, X86_64_RBP, 8);
   100  
   101  	/* Return from the closure */
   102  	x86_64_ret(buf);
   103  }
   104  
   105  void *_jit_create_redirector(unsigned char *buf, void *func,
   106  							 void *user_data, int abi)
   107  {
   108  	jit_nint offset;
   109  	void *start = (void *)buf;
   110  
   111  	/* Save all registers used for argument passing */
   112  	/* At this point RSP is not aligned on a 16 byte boundary because */
   113  	/* the return address is pushed on the stack. */
   114  	/* We need (7 * 8) + (8 * 16) bytes for the registers */
   115  	x86_64_sub_reg_imm_size(buf, X86_64_RSP, 0xB8, 8);
   116  
   117  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xB0, X86_64_RAX, 8);
   118  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA8, X86_64_RDI, 8);
   119  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA0, X86_64_RSI, 8);
   120  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x98, X86_64_RDX, 8);
   121  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x90, X86_64_RCX, 8);
   122  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x88, X86_64_R8, 8);
   123  	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x80, X86_64_R9, 8);
   124  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM0);
   125  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM1);
   126  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM2);
   127  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM3);
   128  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x30, X86_64_XMM4);
   129  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x20, X86_64_XMM5);
   130  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x10, X86_64_XMM6);
   131  	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x00, X86_64_XMM7);
   132  
   133  	/* Fill the pointer to the stack args */
   134  	x86_64_lea_membase_size(buf, X86_64_RDI, X86_64_RSP, 0xD0, 8);
   135  	x86_64_mov_regp_reg_size(buf, X86_64_RSP, X86_64_RDI, 8);
   136  
   137  	/* Load the user data argument */
   138  	x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)user_data, 8);
   139  
   140  	/* Call "func" (the pointer result will be in RAX) */
   141  	offset = (jit_nint)func - ((jit_nint)buf + 5);
   142  	if((offset < jit_min_int) || (offset > jit_max_int))
   143  	{
   144  		/* offset is outside the 32 bit offset range */
   145  		/* so we have to do an indirect call */
   146  		/* We use R11 here because it's the only temporary caller saved */
   147  		/* register not used for argument passing. */
   148  		x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
   149  		x86_64_call_reg(buf, X86_64_R11);
   150  	}
   151  	else
   152  	{
   153  		x86_64_call_imm(buf, (jit_int)offset);
   154  	}
   155  
   156  	/* store the returned address in R11 */
   157  	x86_64_mov_reg_reg_size(buf, X86_64_R11, X86_64_RAX, 8);
   158  
   159  	/* Restore the argument registers */
   160  	x86_64_mov_reg_membase_size(buf, X86_64_RAX, X86_64_RSP, 0xB0, 8);
   161  	x86_64_mov_reg_membase_size(buf, X86_64_RDI, X86_64_RSP, 0xA8, 8);
   162  	x86_64_mov_reg_membase_size(buf, X86_64_RSI, X86_64_RSP, 0xA0, 8);
   163  	x86_64_mov_reg_membase_size(buf, X86_64_RDX, X86_64_RSP, 0x98, 8);
   164  	x86_64_mov_reg_membase_size(buf, X86_64_RCX, X86_64_RSP, 0x90, 8);
   165  	x86_64_mov_reg_membase_size(buf, X86_64_R8, X86_64_RSP, 0x88, 8);
   166  	x86_64_mov_reg_membase_size(buf, X86_64_R9, X86_64_RSP, 0x80, 8);
   167  	x86_64_movaps_reg_membase(buf, X86_64_XMM0, X86_64_RSP, 0x70);
   168  	x86_64_movaps_reg_membase(buf, X86_64_XMM1, X86_64_RSP, 0x60);
   169  	x86_64_movaps_reg_membase(buf, X86_64_XMM2, X86_64_RSP, 0x50);
   170  	x86_64_movaps_reg_membase(buf, X86_64_XMM3, X86_64_RSP, 0x40);
   171  	x86_64_movaps_reg_membase(buf, X86_64_XMM4, X86_64_RSP, 0x30);
   172  	x86_64_movaps_reg_membase(buf, X86_64_XMM5, X86_64_RSP, 0x20);
   173  	x86_64_movaps_reg_membase(buf, X86_64_XMM6, X86_64_RSP, 0x10);
   174  	x86_64_movaps_reg_membase(buf, X86_64_XMM7, X86_64_RSP, 0x00);
   175  
   176  	/* Restore the stack pointer */
   177  	x86_64_add_reg_imm_size(buf, X86_64_RSP, 0xB8, 8);
   178  
   179  	/* Jump to the function that the redirector indicated */
   180  	x86_64_jmp_reg(buf, X86_64_R11);
   181  
   182  	/* Return the start of the buffer as the redirector entry point */
   183  	return start;
   184  }
   185  
   186  void *_jit_create_indirector(unsigned char *buf, void **entry)
   187  {
   188  	void *start = (void *)buf;
   189  
   190  	/* Jump to the entry point. */
   191  	if(((jit_nint)entry >= jit_min_int) && ((jit_nint)entry <= jit_max_int))
   192  	{
   193  		/* We are in the 32bit range so we can use the entry directly. */
   194  		x86_64_jmp_mem(buf, (jit_nint)entry);
   195  	}
   196  	else
   197  	{
   198  		jit_nint offset = (jit_nint)entry - ((jit_nint)buf + 6);
   199  
   200  		if((offset >= jit_min_int) && (offset <= jit_max_int))
   201  		{
   202  			/* We are in the 32bit range so we can use RIP relative addressing. */
   203  			x86_64_jmp_membase(buf, X86_64_RIP, offset);
   204  		}
   205  		else
   206  		{
   207  			/* offset is outside the 32 bit offset range */
   208  			/* so we have to do an indirect jump via register. */
   209  			x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)entry, 8);
   210  			x86_64_jmp_regp(buf, X86_64_R11);
   211  		}
   212  	}
   213  
   214  	return start;
   215  }
   216  
   217  void _jit_pad_buffer(unsigned char *buf, int len)
   218  {
   219  	while(len >= 6)
   220  	{
   221  		/* "leal 0(%esi), %esi" with 32-bit displacement */
   222  		*buf++ = (unsigned char)0x8D;
   223  		x86_address_byte(buf, 2, X86_ESI, X86_ESI);
   224  		x86_imm_emit32(buf, 0);
   225  		len -= 6;
   226  	}
   227  	if(len >= 3)
   228  	{
   229  		/* "leal 0(%esi), %esi" with 8-bit displacement */
   230  		*buf++ = (unsigned char)0x8D;
   231  		x86_address_byte(buf, 1, X86_ESI, X86_ESI);
   232  		x86_imm_emit8(buf, 0);
   233  		len -= 3;
   234  	}
   235  	if(len == 1)
   236  	{
   237  		/* Traditional x86 NOP */
   238  		x86_nop(buf);
   239  	}
   240  	else if(len == 2)
   241  	{
   242  		/* movl %esi, %esi */
   243  		x86_mov_reg_reg(buf, X86_ESI, X86_ESI, 4);
   244  	}
   245  }
   246  
   247  /*
   248   * Allcate the slot for a parameter passed on the stack.
   249   */
   250  static void
   251  _jit_alloc_param_slot(jit_param_passing_t *passing, _jit_param_t *param,
   252  					  jit_type_t type)
   253  {
   254  	jit_int size = jit_type_get_size(type);
   255  	jit_int alignment = jit_type_get_alignment(type);
   256  
   257  	/* Expand the size to a multiple of the stack slot size */
   258  	size = ROUND_STACK(size);
   259  
   260  	/* Expand the alignment to a multiple of the stack slot size */
   261  	/* We expect the alignment to be a power of two after this step */
   262  	alignment = ROUND_STACK(alignment);
   263  
   264  	/* Make sure the current offset is aligned propperly for the type */
   265  	if((passing->stack_size & (alignment -1)) != 0)
   266  	{
   267  		/* We need padding on the stack to fix the alignment constraint */
   268  		jit_int padding = passing->stack_size & (alignment -1);
   269  
   270  		/* Add the padding to the stack region */
   271  		passing->stack_size += padding;
   272  
   273  		/* record the number of pad words needed after pushing this arg */
   274  		param->stack_pad = STACK_SLOTS_USED(padding);
   275  	}
   276  	/* Record the offset of the parameter in the arg region. */
   277  	param->un.offset = passing->stack_size;
   278  
   279  	/* And increase the argument region used. */
   280  	passing->stack_size += size;
   281  }
   282  
   283  /*
   284   * Determine if a type corresponds to a structure or union.
   285   */
   286  static int
   287  is_struct_or_union(jit_type_t type)
   288  {
   289  	type = jit_type_normalize(type);
   290  	if(type)
   291  	{
   292  		if(type->kind == JIT_TYPE_STRUCT || type->kind == JIT_TYPE_UNION)
   293  		{
   294  			return 1;
   295  		}
   296  	}
   297  	return 0;
   298  }
   299  
   300  /*
   301   * Classify the argument type.
   302   * The type has to be in it's normalized form.
   303   */
   304  static int
   305  _jit_classify_arg(jit_type_t arg_type, int is_return)
   306  {
   307  	switch(arg_type->kind)
   308  	{
   309  		case JIT_TYPE_SBYTE:
   310  		case JIT_TYPE_UBYTE:
   311  		case JIT_TYPE_SHORT:
   312  		case JIT_TYPE_USHORT:
   313  		case JIT_TYPE_INT:
   314  		case JIT_TYPE_UINT:
   315  		case JIT_TYPE_NINT:
   316  		case JIT_TYPE_NUINT:
   317  		case JIT_TYPE_LONG:
   318  		case JIT_TYPE_ULONG:
   319  		case JIT_TYPE_SIGNATURE:
   320  		case JIT_TYPE_PTR:
   321  		{
   322  			return X86_64_ARG_INTEGER;
   323  		}
   324  		break;
   325  
   326  		case JIT_TYPE_FLOAT32:
   327  		case JIT_TYPE_FLOAT64:
   328  		{
   329  			return X86_64_ARG_SSE;
   330  		}
   331  		break;
   332  
   333  		case JIT_TYPE_NFLOAT:
   334  		{
   335  			/* we assume the nfloat type to be long double (80bit) */
   336  			if(is_return)
   337  			{
   338  				return X86_64_ARG_X87;
   339  			}
   340  			else
   341  			{
   342  				return X86_64_ARG_MEMORY;
   343  			}
   344  		}
   345  		break;
   346  
   347  		case JIT_TYPE_STRUCT:
   348  		case JIT_TYPE_UNION:
   349  		{
   350  			int size = jit_type_get_size(arg_type);
   351  
   352  			if(size > 16)
   353  			{
   354  				return X86_64_ARG_MEMORY;
   355  			}
   356  			else if(size <= 8)
   357  			{
   358  				return X86_64_ARG_INTEGER;
   359  			}
   360  			/* For structs and unions with sizes between 8 ant 16 bytes */
   361  			/* we have to look at the elements. */
   362  			/* TODO */
   363  		}
   364  	}
   365  	return X86_64_ARG_NO_CLASS;
   366  }
   367  
   368  /*
   369   * On X86_64 the alignment of native types matches their size.
   370   * This leads to the result that all types except nfloats and aggregates
   371   * (structs and unions) must start and end in an eightbyte (or the part
   372   * we are looking at).
   373   */
   374  static int
   375  _jit_classify_structpart(jit_type_t struct_type, unsigned int start,
   376  						 unsigned int start_offset, unsigned int end_offset)
   377  {
   378  	int arg_class = X86_64_ARG_NO_CLASS;
   379  	unsigned int num_fields = jit_type_num_fields(struct_type);
   380  	unsigned int current_field;
   381  	
   382  	for(current_field = 0; current_field < num_fields; ++current_field)
   383  	{
   384  		jit_nuint field_offset = jit_type_get_offset(struct_type,
   385  													 current_field);
   386  
   387  		if(field_offset <= end_offset)
   388  		{
   389  			/* The field starts at a place that's inerresting for us */
   390  			jit_type_t field_type = jit_type_get_field(struct_type,
   391  													   current_field);
   392  			jit_nuint field_size = jit_type_get_size(field_type); 
   393  
   394  			if(field_offset + field_size > start_offset)
   395  			{
   396  				/* The field is at least partially in the part we are */
   397  				/* looking at */
   398  				int arg_class2 = X86_64_ARG_NO_CLASS;
   399  
   400  				if(is_struct_or_union(field_type))
   401  				{
   402  					/* We have to check this struct recursively */
   403  					unsigned int current_start;
   404  					unsigned int nested_struct_start;
   405  					unsigned int nested_struct_end;
   406  
   407  					current_start = start + start_offset;
   408  					if(field_offset < current_start)
   409  					{
   410  						nested_struct_start = current_start - field_offset;
   411  					}
   412  					else
   413  					{
   414  						nested_struct_start = 0;
   415  					}
   416  					if(field_offset + field_size - 1 > end_offset)
   417  					{
   418  						/* The struct ends beyond the part we are looking at */
   419  						nested_struct_end = field_offset + field_size -
   420  												(nested_struct_start + 1);
   421  					}
   422  					else
   423  					{
   424  						nested_struct_end = field_size - 1;
   425  					}
   426  					arg_class2 = _jit_classify_structpart(field_type,
   427  														  start + field_offset,
   428  														  nested_struct_start,
   429  														  nested_struct_end);
   430  				}
   431  				else
   432  				{
   433  					if((start + start_offset) & (field_size - 1))
   434  					{
   435  						/* The field is misaligned */
   436  						return X86_64_ARG_MEMORY;
   437  					}
   438  					arg_class2 = _jit_classify_arg(field_type, 0);
   439  				}
   440  				if(arg_class == X86_64_ARG_NO_CLASS)
   441  				{
   442  					arg_class = arg_class2;
   443  				}
   444  				else if(arg_class != arg_class2)
   445  				{
   446  					if(arg_class == X86_64_ARG_MEMORY ||
   447  					   arg_class2 == X86_64_ARG_MEMORY)
   448  					{
   449  						arg_class = X86_64_ARG_MEMORY;
   450  					}
   451  					else if(arg_class == X86_64_ARG_INTEGER ||
   452  					   arg_class2 == X86_64_ARG_INTEGER)
   453  					{
   454  						arg_class = X86_64_ARG_INTEGER;
   455  					}
   456  					else if(arg_class == X86_64_ARG_X87 ||
   457  					   arg_class2 == X86_64_ARG_X87)
   458  					{
   459  						arg_class = X86_64_ARG_MEMORY;
   460  					}
   461  					else
   462  					{
   463  						arg_class = X86_64_ARG_SSE;
   464  					}
   465  				}
   466  			}
   467  		}
   468  	}
   469  	return arg_class;
   470  }
   471  
   472  int
   473  _jit_classify_struct(jit_param_passing_t *passing,
   474  					_jit_param_t *param, jit_type_t param_type)
   475  {
   476  	jit_nuint size = (jit_nuint)jit_type_get_size(param_type);
   477  
   478  	if(size <= 8)
   479  	{
   480  		int arg_class;
   481  	
   482  		arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1);
   483  		if(arg_class == X86_64_ARG_NO_CLASS)
   484  		{
   485  			arg_class = X86_64_ARG_SSE;
   486  		}
   487  		if(arg_class == X86_64_ARG_INTEGER)
   488  		{
   489  			if(passing->word_index < passing->max_word_regs)
   490  			{
   491  				/* Set the arg class to the number of registers used */
   492  				param->arg_class = 1;
   493  
   494  				/* Set the first register to the register used */
   495  				param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
   496  				param->un.reg_info[0].value = param->value;
   497  				++(passing->word_index);
   498  			}
   499  			else
   500  			{
   501  				/* Set the arg class to stack */
   502  				param->arg_class = JIT_ARG_CLASS_STACK;
   503  
   504  				/* Allocate the slot in the arg passing frame */
   505  				_jit_alloc_param_slot(passing, param, param_type);
   506  			}			
   507  		}
   508  		else if(arg_class == X86_64_ARG_SSE)
   509  		{
   510  			if(passing->float_index < passing->max_float_regs)
   511  			{
   512  				/* Set the arg class to the number of registers used */
   513  				param->arg_class = 1;
   514  
   515  				/* Set the first register to the register used */
   516  				param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
   517  				param->un.reg_info[0].value = param->value;
   518  				++(passing->float_index);
   519  			}
   520  			else
   521  			{
   522  				/* Set the arg class to stack */
   523  				param->arg_class = JIT_ARG_CLASS_STACK;
   524  
   525  				/* Allocate the slot in the arg passing frame */
   526  				_jit_alloc_param_slot(passing, param, param_type);
   527  			}
   528  		}
   529  		else
   530  		{
   531  			/* Set the arg class to stack */
   532  			param->arg_class = JIT_ARG_CLASS_STACK;
   533  
   534  			/* Allocate the slot in the arg passing frame */
   535  			_jit_alloc_param_slot(passing, param, param_type);
   536  		}
   537  	}
   538  	else if(size <= 16)
   539  	{
   540  		int arg_class1;
   541  		int arg_class2;
   542  
   543  		arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7);
   544  		arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1);
   545  		if(arg_class1 == X86_64_ARG_NO_CLASS)
   546  		{
   547  			arg_class1 = X86_64_ARG_SSE;
   548  		}
   549  		if(arg_class2 == X86_64_ARG_NO_CLASS)
   550  		{
   551  			arg_class2 = X86_64_ARG_SSE;
   552  		}
   553  		if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE)
   554  		{
   555  			/* We use only one sse register in this case */
   556  			if(passing->float_index < passing->max_float_regs)
   557  			{
   558  				/* Set the arg class to the number of registers used */
   559  				param->arg_class = 1;
   560  
   561  				/* Set the first register to the register used */
   562  				param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
   563  				param->un.reg_info[0].value = param->value;
   564  				++(passing->float_index);
   565  			}
   566  			else
   567  			{
   568  				/* Set the arg class to stack */
   569  				param->arg_class = JIT_ARG_CLASS_STACK;
   570  
   571  				/* Allocate the slot in the arg passing frame */
   572  				_jit_alloc_param_slot(passing, param, param_type);
   573  			}
   574  		}
   575  		else if(arg_class1 == X86_64_ARG_MEMORY ||
   576  				arg_class2 == X86_64_ARG_MEMORY)
   577  		{
   578  			/* Set the arg class to stack */
   579  			param->arg_class = JIT_ARG_CLASS_STACK;
   580  
   581  			/* Allocate the slot in the arg passing frame */
   582  			_jit_alloc_param_slot(passing, param, param_type);
   583  		}
   584  		else if(arg_class1 == X86_64_ARG_INTEGER &&
   585  				arg_class2 == X86_64_ARG_INTEGER)
   586  		{
   587  			/* We need two general purpose registers in this case */
   588  			if((passing->word_index + 1) < passing->max_word_regs)
   589  			{
   590  				/* Set the arg class to the number of registers used */
   591  				param->arg_class = 2;
   592  
   593  				/* Assign the registers */
   594  				param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
   595  				++(passing->word_index);
   596  				param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
   597  				++(passing->word_index);
   598  			}
   599  			else
   600  			{
   601  				/* Set the arg class to stack */
   602  				param->arg_class = JIT_ARG_CLASS_STACK;
   603  
   604  				/* Allocate the slot in the arg passing frame */
   605  				_jit_alloc_param_slot(passing, param, param_type);
   606  			}			
   607  		}
   608  		else
   609  		{
   610  			/* We need one xmm and one general purpose register */
   611  			if((passing->word_index < passing->max_word_regs) &&
   612  			   (passing->float_index < passing->max_float_regs))
   613  			{
   614  				/* Set the arg class to the number of registers used */
   615  				param->arg_class = 2;
   616  
   617  				if(arg_class1 == X86_64_ARG_INTEGER)
   618  				{
   619  					param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
   620  					++(passing->word_index);
   621  					param->un.reg_info[1].reg =	passing->float_regs[passing->float_index];
   622  					++(passing->float_index);
   623  				}
   624  				else
   625  				{
   626  					param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
   627  					++(passing->float_index);
   628  					param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
   629  					++(passing->word_index);
   630  				}
   631  			}
   632  			else
   633  			{
   634  				/* Set the arg class to stack */
   635  				param->arg_class = JIT_ARG_CLASS_STACK;
   636  
   637  				/* Allocate the slot in the arg passing frame */
   638  				_jit_alloc_param_slot(passing, param, param_type);
   639  			}
   640  		}
   641  	}
   642  	else
   643  	{
   644  		/* Set the arg class to stack */
   645  		param->arg_class = JIT_ARG_CLASS_STACK;
   646  
   647  		/* Allocate the slot in the arg passing frame */
   648  		_jit_alloc_param_slot(passing, param, param_type);
   649  	}
   650  	return 1;
   651  }
   652  
   653  int
   654  _jit_classify_param(jit_param_passing_t *passing,
   655  					_jit_param_t *param, jit_type_t param_type)
   656  {
   657  	if(is_struct_or_union(param_type))
   658  	{
   659  		return _jit_classify_struct(passing, param, param_type);
   660  	}
   661  	else
   662  	{
   663  		int arg_class;
   664  
   665  		arg_class = _jit_classify_arg(param_type, 0);
   666  
   667  		switch(arg_class)
   668  		{
   669  			case X86_64_ARG_INTEGER:
   670  			{
   671  				if(passing->word_index < passing->max_word_regs)
   672  				{
   673  					/* Set the arg class to the number of registers used */
   674  					param->arg_class = 1;
   675  
   676  					/* Set the first register to the register used */
   677  					param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
   678  					param->un.reg_info[0].value = param->value;
   679  					++(passing->word_index);
   680  				}
   681  				else
   682  				{
   683  					/* Set the arg class to stack */
   684  					param->arg_class = JIT_ARG_CLASS_STACK;
   685  
   686  					/* Allocate the slot in the arg passing frame */
   687  					_jit_alloc_param_slot(passing, param, param_type);
   688  				}
   689  			}
   690  			break;
   691  
   692  			case X86_64_ARG_SSE:
   693  			{
   694  				if(passing->float_index < passing->max_float_regs)
   695  				{
   696  					/* Set the arg class to the number of registers used */
   697  					param->arg_class = 1;
   698  
   699  					/* Set the first register to the register used */
   700  					param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
   701  					param->un.reg_info[0].value = param->value;
   702  					++(passing->float_index);
   703  				}
   704  				else
   705  				{
   706  					/* Set the arg class to stack */
   707  					param->arg_class = JIT_ARG_CLASS_STACK;
   708  
   709  					/* Allocate the slot in the arg passing frame */
   710  					_jit_alloc_param_slot(passing, param, param_type);
   711  				}
   712  			}
   713  			break;
   714  
   715  			case X86_64_ARG_MEMORY:
   716  			{
   717  				/* Set the arg class to stack */
   718  				param->arg_class = JIT_ARG_CLASS_STACK;
   719  
   720  				/* Allocate the slot in the arg passing frame */
   721  				_jit_alloc_param_slot(passing, param, param_type);
   722  			}
   723  			break;
   724  		}
   725  	}
   726  	return 1;
   727  }
   728  
   729  void
   730  _jit_builtin_apply_add_struct(jit_apply_builder *builder,
   731  							  void *value,
   732  							  jit_type_t struct_type)
   733  {
   734  	unsigned int size = jit_type_get_size(struct_type);
   735  
   736  	if(size <= 16)
   737  	{
   738  		if(size <= 8)
   739  		{
   740  			int arg_class;
   741  	
   742  			arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1);
   743  			if(arg_class == X86_64_ARG_NO_CLASS)
   744  			{
   745  				arg_class = X86_64_ARG_SSE;
   746  			}
   747  			if((arg_class == X86_64_ARG_INTEGER) &&
   748  			   (builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   749  			{
   750  				/* The struct is passed in a general purpose register */
   751  				jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]),
   752  												value, size);
   753  				++(builder->word_used);
   754  			}
   755  			else if((arg_class == X86_64_ARG_SSE) &&
   756  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
   757  			{
   758  				/* The struct is passed in one sse register */
   759  				jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
   760  												value, size);
   761  				++(builder->float_used);
   762  			}
   763  			else
   764  			{
   765  				unsigned int align = jit_type_get_alignment(struct_type);
   766  
   767  				jit_apply_builder_add_struct(builder, value, size, align);
   768  			}
   769  		}
   770  		else
   771  		{
   772  			int arg_class1;
   773  			int arg_class2;
   774  
   775  			arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7);
   776  			arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1);
   777  			if(arg_class1 == X86_64_ARG_NO_CLASS)
   778  			{
   779  				arg_class1 = X86_64_ARG_SSE;
   780  			}
   781  			if(arg_class2 == X86_64_ARG_NO_CLASS)
   782  			{
   783  				arg_class2 = X86_64_ARG_SSE;
   784  			}
   785  			if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE &&
   786  			   (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
   787  			{
   788  				/* The struct is passed in one sse register */
   789  				jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
   790  											value, size);
   791  				++(builder->float_used);
   792  			}
   793  			else if(arg_class1 == X86_64_ARG_INTEGER &&
   794  					arg_class2 == X86_64_ARG_INTEGER &&
   795  					(builder->word_used < (JIT_APPLY_NUM_WORD_REGS + 1)))
   796  			{
   797  				/* The struct is passed in two general purpose registers */
   798  				jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]),
   799  											value, size);
   800  				(builder->word_used) += 2;
   801  			}
   802  			else if(arg_class1 == X86_64_ARG_INTEGER &&
   803  					arg_class2 == X86_64_ARG_SSE &&
   804  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) &&
   805  					(builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   806  			{
   807  				/* The first eightbyte is passed in a general purpose */
   808  				/* register and the second eightbyte in a sse register */
   809  				builder->apply_args->word_regs[builder->word_used] =
   810  											((jit_nint *)value)[0];
   811  				++(builder->word_used);
   812  				jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
   813  											((char *)value) + 8, size - 8);
   814  				++(builder->float_used);
   815  			}
   816  			else if(arg_class1 == X86_64_ARG_SSE &&
   817  					arg_class2 == X86_64_ARG_INTEGER &&
   818  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) &&
   819  					(builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   820  			{
   821  				/* The first eightbyte is passed in a sse register and */
   822  				/* the second eightbyte in a general purpose  register */
   823  				jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
   824  											value, 8);
   825  				++(builder->float_used);
   826  				jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]),
   827  											((char *)value) + 8, size - 8);
   828  				++(builder->word_used);
   829  			}
   830  			else
   831  			{
   832  				unsigned int align = jit_type_get_alignment(struct_type);
   833  
   834  				jit_apply_builder_add_struct(builder, value, size, align);
   835  			}
   836  		}
   837  	}
   838  	else
   839  	{
   840  		unsigned int align = jit_type_get_alignment(struct_type);
   841  
   842  		jit_apply_builder_add_struct(builder, value, size, align);
   843  	}
   844  }
   845  
   846  void
   847  _jit_builtin_apply_get_struct(jit_apply_builder *builder,
   848  							  void *value,
   849  							  jit_type_t struct_type)
   850  {
   851  	unsigned int size = jit_type_get_size(struct_type);
   852  
   853  	if(size <= 16)
   854  	{
   855  		if(size <= 8)
   856  		{
   857  			int arg_class;
   858  	
   859  			arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1);
   860  			if(arg_class == X86_64_ARG_NO_CLASS)
   861  			{
   862  				arg_class = X86_64_ARG_SSE;
   863  			}
   864  			if((arg_class == X86_64_ARG_INTEGER) &&
   865  			   (builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   866  			{
   867  				/* The struct is passed in a general purpose register */
   868  				jit_memcpy(value,
   869  						   &(builder->apply_args->word_regs[builder->word_used]),
   870  						   size);
   871  				++(builder->word_used);
   872  			}
   873  			else if((arg_class == X86_64_ARG_SSE) &&
   874  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
   875  			{
   876  				/* The struct is passed in one sse register */
   877  				jit_memcpy(value,
   878  						   &(builder->apply_args->float_regs[builder->float_used]),
   879  						   size);
   880  				++(builder->float_used);
   881  			}
   882  			else
   883  			{
   884  				/* TODO: always load the value from stack */
   885  				unsigned int align = jit_type_get_alignment(struct_type);
   886  
   887  				jit_apply_parser_get_struct(builder, size, align, value);
   888  			}
   889  		}
   890  		else
   891  		{
   892  			int arg_class1;
   893  			int arg_class2;
   894  
   895  			arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7);
   896  			arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1);
   897  			if(arg_class1 == X86_64_ARG_NO_CLASS)
   898  			{
   899  				arg_class1 = X86_64_ARG_SSE;
   900  			}
   901  			if(arg_class2 == X86_64_ARG_NO_CLASS)
   902  			{
   903  				arg_class2 = X86_64_ARG_SSE;
   904  			}
   905  			if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE &&
   906  			   (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
   907  			{
   908  				/* The struct is passed in one sse register */
   909  				jit_memcpy(value,
   910  						   &(builder->apply_args->float_regs[builder->float_used]),
   911  						   size);
   912  				++(builder->float_used);
   913  			}
   914  			else if(arg_class1 == X86_64_ARG_INTEGER &&
   915  					arg_class2 == X86_64_ARG_INTEGER &&
   916  					(builder->word_used < (JIT_APPLY_NUM_WORD_REGS + 1)))
   917  			{
   918  				/* The struct is passed in two general purpose registers */
   919  				jit_memcpy(value,
   920  						   &(builder->apply_args->word_regs[builder->word_used]),
   921  						   size);
   922  				(builder->word_used) += 2;
   923  			}
   924  			else if(arg_class1 == X86_64_ARG_INTEGER &&
   925  					arg_class2 == X86_64_ARG_SSE &&
   926  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) &&
   927  					(builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   928  			{
   929  				/* The first eightbyte is passed in a general purpose */
   930  				/* register and the second eightbyte in a sse register */
   931  				((jit_nint *)value)[0] =
   932  					builder->apply_args->word_regs[builder->word_used];
   933  				++(builder->word_used);
   934  
   935  				jit_memcpy(((char *)value) + 8,
   936  						   &(builder->apply_args->float_regs[builder->float_used]),
   937  						   size - 8);
   938  				++(builder->float_used);
   939  			}
   940  			else if(arg_class1 == X86_64_ARG_SSE &&
   941  					arg_class2 == X86_64_ARG_INTEGER &&
   942  					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) &&
   943  					(builder->word_used < JIT_APPLY_NUM_WORD_REGS))
   944  			{
   945  				/* The first eightbyte is passed in a sse register and */
   946  				/* the second eightbyte in a general purpose  register */
   947  				jit_memcpy(value,
   948  						   &(builder->apply_args->float_regs[builder->float_used]),
   949  						   8);
   950  				++(builder->float_used);
   951  
   952  				jit_memcpy(((char *)value) + 8,
   953  						   &(builder->apply_args->word_regs[builder->word_used]),
   954  						   size - 8);
   955  				++(builder->word_used);
   956  			}
   957  			else
   958  			{
   959  				/* TODO: always load the value from stack */
   960  				unsigned int align = jit_type_get_alignment(struct_type);
   961  
   962  				jit_apply_parser_get_struct(builder, size, align, value);
   963  			}
   964  		}
   965  	}
   966  	else
   967  	{
   968  		/* TODO: always load the value from stack */
   969  		unsigned int align = jit_type_get_alignment(struct_type);
   970  
   971  		jit_apply_parser_get_struct(builder, size, align, value);
   972  	}
   973  }
   974  
   975  void
   976  _jit_builtin_apply_get_struct_return(jit_apply_builder *builder,
   977  									 void *return_value,
   978  									 jit_apply_return *apply_return,
   979  									 jit_type_t struct_type)
   980  {
   981  	unsigned int size = jit_type_get_size(struct_type);
   982  
   983  	if(size <= 16)
   984  	{
   985  		if(size <= 8)
   986  		{
   987  			int arg_class;
   988  	
   989  			arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1);
   990  			if(arg_class == X86_64_ARG_NO_CLASS)
   991  			{
   992  				arg_class = X86_64_ARG_SSE;
   993  			}
   994  			if(arg_class == X86_64_ARG_INTEGER)
   995  			{
   996  				/* The struct is returned in %rax */
   997  				jit_memcpy(return_value, (void *)apply_return, size);
   998  				return;
   999  			}
  1000  			else if(arg_class == X86_64_ARG_SSE)
  1001  			{
  1002  				/* The struct is returned in %xmm0 */
  1003  				jit_memcpy(return_value,
  1004  						   &(((jit_ubyte *)apply_return)[16]), size);
  1005  				return;
  1006  			}
  1007  		}
  1008  		else
  1009  		{
  1010  			int arg_class1;
  1011  			int arg_class2;
  1012  
  1013  			arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7);
  1014  			arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1);
  1015  			if(arg_class1 == X86_64_ARG_NO_CLASS)
  1016  			{
  1017  				arg_class1 = X86_64_ARG_SSE;
  1018  			}
  1019  			if(arg_class2 == X86_64_ARG_NO_CLASS)
  1020  			{
  1021  				arg_class2 = X86_64_ARG_SSE;
  1022  			}
  1023  			if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE)
  1024  			{
  1025  				/* The struct is returned in %xmm0 */
  1026  				jit_memcpy(return_value,
  1027  						   &(((jit_ubyte *)apply_return)[16]), size);
  1028  				return;
  1029  			}
  1030  			else if(arg_class1 == X86_64_ARG_INTEGER &&
  1031  					arg_class2 == X86_64_ARG_INTEGER)
  1032  			{
  1033  				/* The struct is returned in %rax and %rdx */
  1034  				jit_memcpy(return_value, (void *)apply_return, size);
  1035  				return;
  1036  			}
  1037  			else if(arg_class1 == X86_64_ARG_INTEGER &&
  1038  					arg_class2 == X86_64_ARG_SSE)
  1039  			{
  1040  				/* The first eightbyte is returned in %rax and the second */
  1041  				/* eightbyte in %xmm0 */
  1042  				((jit_nint *)return_value)[0] =
  1043  					*(jit_nint *)apply_return;
  1044  
  1045  				jit_memcpy(((char *)return_value) + 8,
  1046  						   &(((jit_ubyte *)apply_return)[16]), size - 8);
  1047  				return;
  1048  			}
  1049  			else if(arg_class1 == X86_64_ARG_SSE &&
  1050  					arg_class2 == X86_64_ARG_INTEGER)
  1051  			{
  1052  				/* The first eightbyte is returned in %xmm0 and the second */
  1053  				/* eightbyte in %rax */
  1054  				jit_memcpy(return_value,
  1055  						   &(((jit_ubyte *)apply_return)[16]), 8);
  1056  
  1057  				jit_memcpy(((char *)return_value) + 8,
  1058  						   (void *)apply_return, size - 8);
  1059  				return;
  1060  			}
  1061  		}
  1062  	}
  1063  	/* All other cases are returned via return_ptr */
  1064  	if(builder->struct_return != return_value)
  1065  	{
  1066  		jit_memcpy(return_value, (builder)->struct_return, size);
  1067  	}
  1068  }
  1069  
  1070  #endif /* x86-64 */