github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libffi/src/x86/darwin64.S (about)

     1  /* -----------------------------------------------------------------------
     2     darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
     3  	        Copyright (c) 2008 Red Hat, Inc.
     4     derived from unix64.S
     5  
     6     x86-64 Foreign Function Interface for Darwin.
     7  
     8     Permission is hereby granted, free of charge, to any person obtaining
     9     a copy of this software and associated documentation files (the
    10     ``Software''), to deal in the Software without restriction, including
    11     without limitation the rights to use, copy, modify, merge, publish,
    12     distribute, sublicense, and/or sell copies of the Software, and to
    13     permit persons to whom the Software is furnished to do so, subject to
    14     the following conditions:
    15  
    16     The above copyright notice and this permission notice shall be included
    17     in all copies or substantial portions of the Software.
    18  
    19     THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
    20     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    21     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    22     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
    23     OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
    24     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    25     OTHER DEALINGS IN THE SOFTWARE.
    26     ----------------------------------------------------------------------- */
    27  
    28  #ifdef __x86_64__
    29  #define LIBFFI_ASM
    30  #include <fficonfig.h>
    31  #include <ffi.h>
    32  
    33  	.file "darwin64.S"
    34  .text
    35  
    36  /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
    37  		    void *raddr, void (*fnaddr)(void));
    38  
    39     Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
    40     for this function.  This has been allocated by ffi_call.  We also
    41     deallocate some of the stack that has been alloca'd.  */
    42  
    43  	.align	3
    44  	.globl	_ffi_call_unix64
    45  
    46  _ffi_call_unix64:
    47  LUW0:
    48  	movq	(%rsp), %r10		/* Load return address.  */
    49  	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
    50  	movq	%rdx, (%rax)		/* Save flags.  */
    51  	movq	%rcx, 8(%rax)		/* Save raddr.  */
    52  	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
    53  	movq	%r10, 24(%rax)		/* Relocate return address.  */
    54  	movq	%rax, %rbp		/* Finalize local stack frame.  */
    55  LUW1:
    56  	movq	%rdi, %r10		/* Save a copy of the register area. */
    57  	movq	%r8, %r11		/* Save a copy of the target fn.  */
    58  	movl	%r9d, %eax		/* Set number of SSE registers.  */
    59  
    60  	/* Load up all argument registers.  */
    61  	movq	(%r10), %rdi
    62  	movq	8(%r10), %rsi
    63  	movq	16(%r10), %rdx
    64  	movq	24(%r10), %rcx
    65  	movq	32(%r10), %r8
    66  	movq	40(%r10), %r9
    67  	testl	%eax, %eax
    68  	jnz	Lload_sse
    69  Lret_from_load_sse:
    70  
    71  	/* Deallocate the reg arg area.  */
    72  	leaq	176(%r10), %rsp
    73  
    74  	/* Call the user function.  */
    75  	call	*%r11
    76  
    77  	/* Deallocate stack arg area; local stack frame in redzone.  */
    78  	leaq	24(%rbp), %rsp
    79  
    80  	movq	0(%rbp), %rcx		/* Reload flags.  */
    81  	movq	8(%rbp), %rdi		/* Reload raddr.  */
    82  	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
    83  LUW2:
    84  
    85  	/* The first byte of the flags contains the FFI_TYPE.  */
    86  	movzbl	%cl, %r10d
    87  	leaq	Lstore_table(%rip), %r11
    88  	movslq	(%r11, %r10, 4), %r10
    89  	addq	%r11, %r10
    90  	jmp	*%r10
    91  
    92  Lstore_table:
    93  	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
    94  	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
    95  	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
    96  	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
    97  	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
    98  	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
    99  	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
   100  	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
   101  	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
   102  	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
   103  	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
   104  	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
   105  	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
   106  	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
   107  	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
   108  
   109  	.text
   110  	.align	3
   111  Lst_void:
   112  	ret
   113  	.align	3
   114  Lst_uint8:
   115  	movzbq	%al, %rax
   116  	movq	%rax, (%rdi)
   117  	ret
   118  	.align	3
   119  Lst_sint8:
   120  	movsbq	%al, %rax
   121  	movq	%rax, (%rdi)
   122  	ret
   123  	.align	3
   124  Lst_uint16:
   125  	movzwq	%ax, %rax
   126  	movq	%rax, (%rdi)
   127  	.align	3
   128  Lst_sint16:
   129  	movswq	%ax, %rax
   130  	movq	%rax, (%rdi)
   131  	ret
   132  	.align	3
   133  Lst_uint32:
   134  	movl	%eax, %eax
   135  	movq	%rax, (%rdi)
   136  	.align	3
   137  Lst_sint32:
   138  	cltq
   139  	movq	%rax, (%rdi)
   140  	ret
   141  	.align	3
   142  Lst_int64:
   143  	movq	%rax, (%rdi)
   144  	ret
   145  	.align	3
   146  Lst_float:
   147  	movss	%xmm0, (%rdi)
   148  	ret
   149  	.align	3
   150  Lst_double:
   151  	movsd	%xmm0, (%rdi)
   152  	ret
   153  Lst_ldouble:
   154  	fstpt	(%rdi)
   155  	ret
   156  	.align	3
   157  Lst_struct:
   158  	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
   159  
   160  	/* We have to locate the values now, and since we don't want to
   161  	   write too much data into the user's return value, we spill the
   162  	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
   163  	   control where the values are located.  Only one of the three
   164  	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
   165  	movd	%xmm0, %r10
   166  	movd	%xmm1, %r11
   167  	testl	$0x100, %ecx
   168  	cmovnz	%rax, %rdx
   169  	cmovnz	%r10, %rax
   170  	testl	$0x200, %ecx
   171  	cmovnz	%r10, %rdx
   172  	testl	$0x400, %ecx
   173  	cmovnz	%r10, %rax
   174  	cmovnz	%r11, %rdx
   175  	movq	%rax, (%rsi)
   176  	movq	%rdx, 8(%rsi)
   177  
   178  	/* Bits 12-31 contain the true size of the structure.  Copy from
   179  	   the scratch area to the true destination.  */
   180  	shrl	$12, %ecx
   181  	rep movsb
   182  	ret
   183  
   184  	/* Many times we can avoid loading any SSE registers at all.
   185  	   It's not worth an indirect jump to load the exact set of
   186  	   SSE registers needed; zero or all is a good compromise.  */
   187  	.align	3
   188  LUW3:
   189  Lload_sse:
   190  	movdqa	48(%r10), %xmm0
   191  	movdqa	64(%r10), %xmm1
   192  	movdqa	80(%r10), %xmm2
   193  	movdqa	96(%r10), %xmm3
   194  	movdqa	112(%r10), %xmm4
   195  	movdqa	128(%r10), %xmm5
   196  	movdqa	144(%r10), %xmm6
   197  	movdqa	160(%r10), %xmm7
   198  	jmp	Lret_from_load_sse
   199  
   200  LUW4:
   201  	.align	3
   202  	.globl	_ffi_closure_unix64
   203  
   204  _ffi_closure_unix64:
   205  LUW5:
   206  	/* The carry flag is set by the trampoline iff SSE registers
   207  	   are used.  Don't clobber it before the branch instruction.  */
   208  	leaq    -200(%rsp), %rsp
   209  LUW6:
   210  	movq	%rdi, (%rsp)
   211  	movq    %rsi, 8(%rsp)
   212  	movq    %rdx, 16(%rsp)
   213  	movq    %rcx, 24(%rsp)
   214  	movq    %r8, 32(%rsp)
   215  	movq    %r9, 40(%rsp)
   216  	jc      Lsave_sse
   217  Lret_from_save_sse:
   218  
   219  	movq	%r10, %rdi
   220  	leaq	176(%rsp), %rsi
   221  	movq	%rsp, %rdx
   222  	leaq	208(%rsp), %rcx
   223  	call	_ffi_closure_unix64_inner
   224  
   225  	/* Deallocate stack frame early; return value is now in redzone.  */
   226  	addq	$200, %rsp
   227  LUW7:
   228  
   229  	/* The first byte of the return value contains the FFI_TYPE.  */
   230  	movzbl	%al, %r10d
   231  	leaq	Lload_table(%rip), %r11
   232  	movslq	(%r11, %r10, 4), %r10
   233  	addq	%r11, %r10
   234  	jmp	*%r10
   235  
   236  Lload_table:
   237  	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
   238  	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
   239  	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
   240  	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
   241  	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
   242  	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
   243  	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
   244  	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
   245  	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
   246  	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
   247  	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
   248  	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
   249  	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
   250  	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
   251  	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
   252  
   253  	.text
   254  	.align	3
   255  Lld_void:
   256  	ret
   257  	.align	3
   258  Lld_int8:
   259  	movzbl	-24(%rsp), %eax
   260  	ret
   261  	.align	3
   262  Lld_int16:
   263  	movzwl	-24(%rsp), %eax
   264  	ret
   265  	.align	3
   266  Lld_int32:
   267  	movl	-24(%rsp), %eax
   268  	ret
   269  	.align	3
   270  Lld_int64:
   271  	movq	-24(%rsp), %rax
   272  	ret
   273  	.align	3
   274  Lld_float:
   275  	movss	-24(%rsp), %xmm0
   276  	ret
   277  	.align	3
   278  Lld_double:
   279  	movsd	-24(%rsp), %xmm0
   280  	ret
   281  	.align	3
   282  Lld_ldouble:
   283  	fldt	-24(%rsp)
   284  	ret
   285  	.align	3
   286  Lld_struct:
   287  	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
   288  	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
   289  	   both rdx and xmm1 with the second word.  For the remaining,
   290  	   bit 8 set means xmm0 gets the second word, and bit 9 means
   291  	   that rax gets the second word.  */
   292  	movq	-24(%rsp), %rcx
   293  	movq	-16(%rsp), %rdx
   294  	movq	-16(%rsp), %xmm1
   295  	testl	$0x100, %eax
   296  	cmovnz	%rdx, %rcx
   297  	movd	%rcx, %xmm0
   298  	testl	$0x200, %eax
   299  	movq	-24(%rsp), %rax
   300  	cmovnz	%rdx, %rax
   301  	ret
   302  
   303  	/* See the comment above Lload_sse; the same logic applies here.  */
   304  	.align	3
   305  LUW8:
   306  Lsave_sse:
   307  	movdqa	%xmm0, 48(%rsp)
   308  	movdqa	%xmm1, 64(%rsp)
   309  	movdqa	%xmm2, 80(%rsp)
   310  	movdqa	%xmm3, 96(%rsp)
   311  	movdqa	%xmm4, 112(%rsp)
   312  	movdqa	%xmm5, 128(%rsp)
   313  	movdqa	%xmm6, 144(%rsp)
   314  	movdqa	%xmm7, 160(%rsp)
   315  	jmp	Lret_from_save_sse
   316  
   317  LUW9:
   318  .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
   319  EH_frame1:
   320  	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
   321  	.long	L$set$0
   322  LSCIE1:
   323  	.long	0x0		/* CIE Identifier Tag */
   324  	.byte	0x1		/* CIE Version */
   325  	.ascii	"zR\0"		/* CIE Augmentation */
   326  	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
   327  	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
   328  	.byte	0x10		/* CIE RA Column */
   329  	.byte	0x1		/* uleb128 0x1; Augmentation size */
   330  	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
   331  	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
   332  	.byte	0x7		/* uleb128 0x7 */
   333  	.byte	0x8		/* uleb128 0x8 */
   334  	.byte	0x90		/* DW_CFA_offset, column 0x10 */
   335  	.byte	0x1
   336  	.align	3
   337  LECIE1:
   338  	.globl _ffi_call_unix64.eh
   339  _ffi_call_unix64.eh:
   340  LSFDE1:
   341  	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
   342  	.long	L$set$1
   343  LASFDE1:
   344  	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
   345  	.quad	LUW0-.			/* FDE initial location */
   346  	.set	L$set$2,LUW4-LUW0	/* FDE address range */
   347  	.quad	L$set$2
   348  	.byte	0x0			/* Augmentation size */
   349  	.byte	0x4			/* DW_CFA_advance_loc4 */
   350  	.set	L$set$3,LUW1-LUW0
   351  	.long	L$set$3
   352  
   353  	/* New stack frame based off rbp.  This is a itty bit of unwind
   354  	   trickery in that the CFA *has* changed.  There is no easy way
   355  	   to describe it correctly on entry to the function.  Fortunately,
   356  	   it doesn't matter too much since at all points we can correctly
   357  	   unwind back to ffi_call.  Note that the location to which we
   358  	   moved the return address is (the new) CFA-8, so from the
   359  	   perspective of the unwind info, it hasn't moved.  */
   360  	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
   361  	.byte	0x6
   362  	.byte	0x20
   363  	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
   364  	.byte	0x2
   365  	.byte	0xa			/* DW_CFA_remember_state */
   366  
   367  	.byte	0x4			/* DW_CFA_advance_loc4 */
   368  	.set	L$set$4,LUW2-LUW1
   369  	.long	L$set$4
   370  	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
   371  	.byte	0x7
   372  	.byte	0x8
   373  	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
   374  
   375  	.byte	0x4			/* DW_CFA_advance_loc4 */
   376  	.set	L$set$5,LUW3-LUW2
   377  	.long	L$set$5
   378  	.byte	0xb			/* DW_CFA_restore_state */
   379  
   380  	.align	3
   381  LEFDE1:
   382  	.globl _ffi_closure_unix64.eh
   383  _ffi_closure_unix64.eh:
   384  LSFDE3:
   385  	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
   386  	.long	L$set$6
   387  LASFDE3:
   388  	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
   389  	.quad	LUW5-.			/* FDE initial location */
   390  	.set	L$set$7,LUW9-LUW5	/* FDE address range */
   391  	.quad	L$set$7
   392  	.byte	0x0			/* Augmentation size */
   393  
   394  	.byte	0x4			/* DW_CFA_advance_loc4 */
   395  	.set	L$set$8,LUW6-LUW5
   396  	.long	L$set$8
   397  	.byte	0xe			/* DW_CFA_def_cfa_offset */
   398  	.byte	208,1			/* uleb128 208 */
   399  	.byte	0xa			/* DW_CFA_remember_state */
   400  
   401  	.byte	0x4			/* DW_CFA_advance_loc4 */
   402  	.set	L$set$9,LUW7-LUW6
   403  	.long	L$set$9
   404  	.byte	0xe			/* DW_CFA_def_cfa_offset */
   405  	.byte	0x8
   406  
   407  	.byte	0x4			/* DW_CFA_advance_loc4 */
   408  	.set	L$set$10,LUW8-LUW7
   409  	.long	L$set$10
   410  	.byte	0xb			/* DW_CFA_restore_state */
   411  
   412  	.align	3
   413  LEFDE3:
   414  	.subsections_via_symbols
   415  
   416  #endif /* __x86_64__ */