github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-apply-x86-64.h (about)

     1  /*
     2   * jit-apply-x86-64.h - Special definitions for x86-64 function application.
     3   *
     4   * Copyright (C) 2004  Southern Storm Software, Pty Ltd.
     5   *
     6   * This file is part of the libjit library.
     7   *
     8   * The libjit library is free software: you can redistribute it and/or
     9   * modify it under the terms of the GNU Lesser General Public License
    10   * as published by the Free Software Foundation, either version 2.1 of
    11   * the License, or (at your option) any later version.
    12   *
    13   * The libjit library is distributed in the hope that it will be useful,
    14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    16   * Lesser General Public License for more details.
    17   *
    18   * You should have received a copy of the GNU Lesser General Public
    19   * License along with the libjit library.  If not, see
    20   * <http://www.gnu.org/licenses/>.
    21   */
    22  
    23  #ifndef	_JIT_APPLY_X86_64_H
    24  #define	_JIT_APPLY_X86_64_H
    25  
    26  #include <jit/jit-common.h>
    27  
    28  /*
    29   * Flag that a parameter is passed on the stack.
    30   */
    31  #define JIT_ARG_CLASS_STACK	0xFFFF
    32  
    33  /*
    34   * Define the way the parameter is passed to a specific function
    35   */
    36  typedef struct
    37  {
    38  	int reg;
    39  	jit_value_t value;
    40  } _jit_structpassing_t;
    41  
    42  typedef struct
    43  {
    44  	jit_value_t value;
    45  	jit_ushort arg_class;
    46  	jit_ushort stack_pad;		/* Number of stack words needed for padding */
    47  	union
    48  	{
    49  		_jit_structpassing_t reg_info[4];
    50  		jit_int offset;
    51  	} un;
    52  } _jit_param_t;
    53  
    54  /*
    55   * Structure that is used to help with parameter passing.
    56   */
    57  typedef struct
    58  {
    59  	int				stack_size;			/* Number of bytes needed on the */
    60  										/* stack for parameter passing */
    61  	int				stack_pad;			/* Number of stack words we have */
    62  										/* to push before pushing the */
    63  										/* parameters for keeping the stack */
    64  										/* aligned */
    65  	unsigned int	word_index;			/* Number of word registers */
    66  										/* allocated */
    67  	unsigned int	max_word_regs;		/* Number of word registers */
    68  										/* available for parameter passing */
    69  	const int	   *word_regs;
    70  	unsigned int	float_index;
    71  	unsigned int	max_float_regs;
    72  	const int	   *float_regs;
    73  	_jit_param_t   *params;
    74  
    75  } jit_param_passing_t;
    76  
    77  /*
    78   * Determine how a parameter is passed.
    79   */
    80  int
    81  _jit_classify_param(jit_param_passing_t *passing,
    82  					_jit_param_t *param, jit_type_t param_type);
    83  
    84  /*
    85   * Determine how a struct type is passed.
    86   */
    87  int
    88  _jit_classify_struct(jit_param_passing_t *passing,
    89  					_jit_param_t *param, jit_type_t param_type);
    90  
    91  /*
    92   * We handle struct passing ourself
    93   */
    94  #define HAVE_JIT_BUILTIN_APPLY_STRUCT 1
    95  
    96  /*
    97   * We handle struct returning ourself
    98   */
    99  #define HAVE_JIT_BUILTIN_APPLY_STRUCT_RETURN 1
   100  
   101  /*
   102   * The granularity of the stack
   103   */
   104  #define STACK_SLOT_SIZE	sizeof(void *)
   105  
   106  /*
   107   * Get he number of complete stack slots used
   108   */
   109  #define STACK_SLOTS_USED(size) ((size) >> 3)
   110  
   111  /*
   112   * Round a size up to a multiple of the stack word size.
   113   */
   114  #define	ROUND_STACK(size)	\
   115  		(((size) + (STACK_SLOT_SIZE - 1)) & ~(STACK_SLOT_SIZE - 1))
   116  
   117  /*
   118   * The "__builtin_apply" functionality in gcc orders the registers
   119   * in a strange way, which makes it difficult to use.  Our replacement
   120   * apply structure is laid out in the following order:
   121   *
   122   *		stack pointer
   123   *		%rdi, %rsi, %rdx, %rcx, %r8, %r9
   124   *		64-bit pad word
   125   *		%xmm0-%xmm7
   126   *
   127   * The total size of the apply structure is 192 bytes.  The return structure
   128   * is laid out as follows:
   129   *
   130   *		%rax, %rdx
   131   *		%xmm0
   132   *		%st0
   133   *
   134   * The total size of the return structure is 48 bytes.
   135   */
   136  
   137  #if defined(__GNUC__)
   138  
   139  #ifndef	JIT_MEMCPY
   140  # if defined(__APPLE__) && defined(__MACH__)
   141  #  define JIT_MEMCPY "_jit_memcpy"
   142  # else
   143  #  define JIT_MEMCPY "jit_memcpy@PLT"
   144  # endif
   145  #endif
   146  
   147  /*
   148   * We have to add all registers not saved by the caller to the clobber list
   149   * and not only the registers used for parameter passing because we call
   150   * arbitrary functions.
   151   * Maybe we should add the mmx* registers too?
   152   */
   153  #define	jit_builtin_apply(func,args,size,return_float,return_buf)	\
   154  		do { \
   155  			void *__func = (void *)(func); \
   156  			void *__args = (void *)(args); \
   157  			long __size = (((long)(size) + (long)0xf) & ~(long)0xf); \
   158  			void *__return_buf = alloca(64); \
   159  			(return_buf) = __return_buf; \
   160  			__asm__ ( \
   161  				"movq %1, %%rax\n\t" \
   162  				"movq (%%rax), %%rsi\n\t" \
   163  				"movq %2, %%rdx\n\t" \
   164  				"subq %%rdx, %%rsp\n\t" \
   165  				"movq %%rsp, %%rdi\n\t" \
   166  				"callq " JIT_MEMCPY "\n\t" \
   167  				"movq %1, %%rax\n\t" \
   168  				"movq 0x08(%%rax), %%rdi\n\t" \
   169  				"movq 0x10(%%rax), %%rsi\n\t" \
   170  				"movq 0x18(%%rax), %%rdx\n\t" \
   171  				"movq 0x20(%%rax), %%rcx\n\t" \
   172  				"movq 0x28(%%rax), %%r8\n\t" \
   173  				"movq 0x30(%%rax), %%r9\n\t" \
   174  				"movaps 0x40(%%rax), %%xmm0\n\t" \
   175  				"movaps 0x50(%%rax), %%xmm1\n\t" \
   176  				"movaps 0x60(%%rax), %%xmm2\n\t" \
   177  				"movaps 0x70(%%rax), %%xmm3\n\t" \
   178  				"movaps 0x80(%%rax), %%xmm4\n\t" \
   179  				"movaps 0x90(%%rax), %%xmm5\n\t" \
   180  				"movaps 0xA0(%%rax), %%xmm6\n\t" \
   181  				"movaps 0xB0(%%rax), %%xmm7\n\t" \
   182  				"movq %0, %%r11\n\t" \
   183  				"movl $8, %%eax\n\t" \
   184  				"callq *%%r11\n\t" \
   185  				"movq %3, %%rcx\n\t" \
   186  				"movq %%rax, (%%rcx)\n\t" \
   187  				"movq %%rdx, 0x08(%%rcx)\n\t" \
   188  				"movaps %%xmm0, 0x10(%%rcx)\n\t" \
   189  				"movq %2, %%rdx\n\t" \
   190  				"addq %%rdx, %%rsp\n\t" \
   191  				: : "m"(__func), "m"(__args), "m"(__size), "m"(__return_buf) \
   192  				: "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", \
   193  				  "r10", "r11", \
   194  				  "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", \
   195  				  "xmm5", "xmm6", "xmm7" \
   196  			); \
   197  			if((return_float)) \
   198  			{ \
   199  				__asm__ ( \
   200  					"movq %0, %%rax\n\t" \
   201  					"fstpt 0x20(%%rax)\n\t" \
   202  					: : "m"(__return_buf) \
   203  					: "rax", "st" \
   204  				); \
   205  			} \
   206  		} while (0)
   207  
   208  #define	jit_builtin_apply_args(type,args)	\
   209  		do { \
   210  			void *__args = alloca(192); \
   211  			__asm__ ( \
   212  				"pushq %%rdi\n\t" \
   213  				"leaq 16(%%rbp), %%rdi\n\t" \
   214  				"movq %0, %%rax\n\t" \
   215  				"movq %%rdi, (%%rax)\n\t" \
   216  				"popq %%rdi\n\t" \
   217  				"movq %%rdi, 0x08(%%rax)\n\t" \
   218  				"movq %%rsi, 0x10(%%rax)\n\t" \
   219  				"movq %%rdx, 0x18(%%rax)\n\t" \
   220  				"movq %%rcx, 0x20(%%rax)\n\t" \
   221  				"movq %%r8, 0x28(%%rax)\n\t" \
   222  				"movq %%r9, 0x30(%%rax)\n\t" \
   223  				"movaps %%xmm0, 0x40(%%rax)\n\t" \
   224  				"movaps %%xmm1, 0x50(%%rax)\n\t" \
   225  				"movaps %%xmm2, 0x60(%%rax)\n\t" \
   226  				"movaps %%xmm3, 0x70(%%rax)\n\t" \
   227  				"movaps %%xmm4, 0x80(%%rax)\n\t" \
   228  				"movaps %%xmm5, 0x90(%%rax)\n\t" \
   229  				"movaps %%xmm6, 0xA0(%%rax)\n\t" \
   230  				"movaps %%xmm7, 0xB0(%%rax)\n\t" \
   231  				: : "m"(__args) \
   232  				: "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", \
   233  				  "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", \
   234  				  "xmm5", "xmm6", "xmm7" \
   235  			); \
   236  			(args) = (type)__args; \
   237  		} while (0)
   238  
   239  #define	jit_builtin_return_int(return_buf)	\
   240  		do { \
   241  			__asm__ ( \
   242  				"lea %0, %%rcx\n\t" \
   243  				"movq (%%rcx), %%rax\n\t" \
   244  				"movq 0x08(%%rcx), %%rdx\n\t" \
   245  				"movaps 0x10(%%rcx), %%xmm0\n\t" \
   246  				: : "m"(*(return_buf)) \
   247  				: "rax", "rcx", "rdx", "xmm0" \
   248  			); \
   249  			return; \
   250  		} while (0)
   251  
   252  #define	jit_builtin_return_float(return_buf)	\
   253  		do { \
   254  			__asm__ ( \
   255  				"lea %0, %%rcx\n\t" \
   256  				"movaps 0x10(%%rcx), %%xmm0\n\t" \
   257  				: : "m"(*(return_buf)) \
   258  				: "rcx", "xmm0", "st" \
   259  			); \
   260  			return; \
   261  		} while (0)
   262  
   263  #define	jit_builtin_return_double(return_buf)	\
   264  		do { \
   265  			__asm__ ( \
   266  				"lea %0, %%rcx\n\t" \
   267  				"movaps 0x10(%%rcx), %%xmm0\n\t" \
   268  				: : "m"(*(return_buf)) \
   269  				: "rcx", "xmm0", "st" \
   270  			); \
   271  			return; \
   272  		} while (0)
   273  
   274  #define	jit_builtin_return_nfloat(return_buf)	\
   275  		do { \
   276  			__asm__ ( \
   277  				"lea %0, %%rcx\n\t" \
   278  				"fldt 0x20(%%rcx)\n\t" \
   279  				: : "m"(*(return_buf)) \
   280  				: "rcx", "xmm0", "st" \
   281  			); \
   282  			return; \
   283  		} while (0)
   284  
   285  #define jit_builtin_return_struct(return_buf, type) \
   286  		do { \
   287  		} while (0)
   288  
   289  #endif /* GNUC */
   290  
   291  /*
   292   * The maximum number of bytes that are needed to represent a closure,
   293   * and the alignment to use for the closure.
   294   */
   295  #define	jit_closure_size		0x90
   296  #define	jit_closure_align		0x20
   297  
   298  /*
   299   * The number of bytes that are needed for a redirector stub.
   300   * This includes any extra bytes that are needed for alignment.
   301   */
   302  #define	jit_redirector_size		0x100
   303  
   304  /*
   305   * The number of bytes that are needed for a indirector stub.
   306   * This includes any extra bytes that are needed for alignment.
   307   */
   308  #define	jit_indirector_size		0x10
   309  
   310  
   311  #endif	/* _JIT_APPLY_X86_64_H */