github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libffi/src/aarch64/ffi.c (about)

     1  /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
     2  
     3  Permission is hereby granted, free of charge, to any person obtaining
     4  a copy of this software and associated documentation files (the
     5  ``Software''), to deal in the Software without restriction, including
     6  without limitation the rights to use, copy, modify, merge, publish,
     7  distribute, sublicense, and/or sell copies of the Software, and to
     8  permit persons to whom the Software is furnished to do so, subject to
     9  the following conditions:
    10  
    11  The above copyright notice and this permission notice shall be
    12  included in all copies or substantial portions of the Software.
    13  
    14  THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
    15  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    16  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    17  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    18  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    19  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    20  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
    21  
    22  #include <stdio.h>
    23  #include <stdlib.h>
    24  #include <stdint.h>
    25  #include <ffi.h>
    26  #include <ffi_common.h>
    27  #include "internal.h"
    28  
    29  /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
    30     all further uses in this file will refer to the 128-bit type.  */
    31  #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
    32  # if FFI_TYPE_LONGDOUBLE != 4
    33  #  error FFI_TYPE_LONGDOUBLE out of date
    34  # endif
    35  #else
    36  # undef FFI_TYPE_LONGDOUBLE
    37  # define FFI_TYPE_LONGDOUBLE 4
    38  #endif
    39  
    40  union _d
    41  {
    42    UINT64 d;
    43    UINT32 s[2];
    44  };
    45  
    46  struct _v
    47  {
    48    union _d d[2] __attribute__((aligned(16)));
    49  };
    50  
    51  struct call_context
    52  {
    53    struct _v v[N_V_ARG_REG];
    54    UINT64 x[N_X_ARG_REG];
    55  };
    56  
    57  #if defined (__clang__) && defined (__APPLE__)
    58  extern void sys_icache_invalidate (void *start, size_t len);
    59  #endif
    60  
    61  static inline void
    62  ffi_clear_cache (void *start, void *end)
    63  {
    64  #if defined (__clang__) && defined (__APPLE__)
    65    sys_icache_invalidate (start, (char *)end - (char *)start);
    66  #elif defined (__GNUC__)
    67    __builtin___clear_cache (start, end);
    68  #else
    69  #error "Missing builtin to flush instruction cache"
    70  #endif
    71  }
    72  
    73  /* A subroutine of is_vfp_type.  Given a structure type, return the type code
    74     of the first non-structure element.  Recurse for structure elements.
    75     Return -1 if the structure is in fact empty, i.e. no nested elements.  */
    76  
    77  static int
    78  is_hfa0 (const ffi_type *ty)
    79  {
    80    ffi_type **elements = ty->elements;
    81    int i, ret = -1;
    82  
    83    if (elements != NULL)
    84      for (i = 0; elements[i]; ++i)
    85        {
    86          ret = elements[i]->type;
    87          if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
    88            {
    89              ret = is_hfa0 (elements[i]);
    90              if (ret < 0)
    91                continue;
    92            }
    93          break;
    94        }
    95  
    96    return ret;
    97  }
    98  
    99  /* A subroutine of is_vfp_type.  Given a structure type, return true if all
   100     of the non-structure elements are the same as CANDIDATE.  */
   101  
   102  static int
   103  is_hfa1 (const ffi_type *ty, int candidate)
   104  {
   105    ffi_type **elements = ty->elements;
   106    int i;
   107  
   108    if (elements != NULL)
   109      for (i = 0; elements[i]; ++i)
   110        {
   111          int t = elements[i]->type;
   112          if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
   113            {
   114              if (!is_hfa1 (elements[i], candidate))
   115                return 0;
   116            }
   117          else if (t != candidate)
   118            return 0;
   119        }
   120  
   121    return 1;
   122  }
   123  
   124  /* Determine if TY may be allocated to the FP registers.  This is both an
   125     fp scalar type as well as an homogenous floating point aggregate (HFA).
   126     That is, a structure consisting of 1 to 4 members of all the same type,
   127     where that type is an fp scalar.
   128  
   129     Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_*
   130     constant for the type.  */
   131  
   132  static int
   133  is_vfp_type (const ffi_type *ty)
   134  {
   135    ffi_type **elements;
   136    int candidate, i;
   137    size_t size, ele_count;
   138  
   139    /* Quickest tests first.  */
   140    candidate = ty->type;
   141    switch (candidate)
   142      {
   143      default:
   144        return 0;
   145      case FFI_TYPE_FLOAT:
   146      case FFI_TYPE_DOUBLE:
   147      case FFI_TYPE_LONGDOUBLE:
   148        ele_count = 1;
   149        goto done;
   150      case FFI_TYPE_COMPLEX:
   151        candidate = ty->elements[0]->type;
   152        switch (candidate)
   153  	{
   154  	case FFI_TYPE_FLOAT:
   155  	case FFI_TYPE_DOUBLE:
   156  	case FFI_TYPE_LONGDOUBLE:
   157  	  ele_count = 2;
   158  	  goto done;
   159  	}
   160        return 0;
   161      case FFI_TYPE_STRUCT:
   162        break;
   163      }
   164  
   165    /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
   166    size = ty->size;
   167    if (size < 4 || size > 64)
   168      return 0;
   169  
   170    /* Find the type of the first non-structure member.  */
   171    elements = ty->elements;
   172    candidate = elements[0]->type;
   173    if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
   174      {
   175        for (i = 0; ; ++i)
   176          {
   177            candidate = is_hfa0 (elements[i]);
   178            if (candidate >= 0)
   179              break;
   180          }
   181      }
   182  
   183    /* If the first member is not a floating point type, it's not an HFA.
   184       Also quickly re-check the size of the structure.  */
   185    switch (candidate)
   186      {
   187      case FFI_TYPE_FLOAT:
   188        ele_count = size / sizeof(float);
   189        if (size != ele_count * sizeof(float))
   190          return 0;
   191        break;
   192      case FFI_TYPE_DOUBLE:
   193        ele_count = size / sizeof(double);
   194        if (size != ele_count * sizeof(double))
   195          return 0;
   196        break;
   197      case FFI_TYPE_LONGDOUBLE:
   198        ele_count = size / sizeof(long double);
   199        if (size != ele_count * sizeof(long double))
   200          return 0;
   201        break;
   202      default:
   203        return 0;
   204      }
   205    if (ele_count > 4)
   206      return 0;
   207  
   208    /* Finally, make sure that all scalar elements are the same type.  */
   209    for (i = 0; elements[i]; ++i)
   210      {
   211        int t = elements[i]->type;
   212        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
   213          {
   214            if (!is_hfa1 (elements[i], candidate))
   215              return 0;
   216          }
   217        else if (t != candidate)
   218          return 0;
   219      }
   220  
   221    /* All tests succeeded.  Encode the result.  */
   222   done:
   223    return candidate * 4 + (4 - ele_count);
   224  }
   225  
   226  /* Representation of the procedure call argument marshalling
   227     state.
   228  
   229     The terse state variable names match the names used in the AARCH64
   230     PCS. */
   231  
   232  struct arg_state
   233  {
   234    unsigned ngrn;                /* Next general-purpose register number. */
   235    unsigned nsrn;                /* Next vector register number. */
   236    size_t nsaa;                  /* Next stack offset. */
   237  
   238  #if defined (__APPLE__)
   239    unsigned allocating_variadic;
   240  #endif
   241  };
   242  
   243  /* Initialize a procedure call argument marshalling state.  */
   244  static void
   245  arg_init (struct arg_state *state)
   246  {
   247    state->ngrn = 0;
   248    state->nsrn = 0;
   249    state->nsaa = 0;
   250  #if defined (__APPLE__)
   251    state->allocating_variadic = 0;
   252  #endif
   253  }
   254  
   255  /* Allocate an aligned slot on the stack and return a pointer to it.  */
   256  static void *
   257  allocate_to_stack (struct arg_state *state, void *stack,
   258  		   size_t alignment, size_t size)
   259  {
   260    size_t nsaa = state->nsaa;
   261  
   262    /* Round up the NSAA to the larger of 8 or the natural
   263       alignment of the argument's type.  */
   264  #if defined (__APPLE__)
   265    if (state->allocating_variadic && alignment < 8)
   266      alignment = 8;
   267  #else
   268    if (alignment < 8)
   269      alignment = 8;
   270  #endif
   271      
   272    nsaa = ALIGN (nsaa, alignment);
   273    state->nsaa = nsaa + size;
   274  
   275    return (char *)stack + nsaa;
   276  }
   277  
   278  static ffi_arg
   279  extend_integer_type (void *source, int type)
   280  {
   281    switch (type)
   282      {
   283      case FFI_TYPE_UINT8:
   284        return *(UINT8 *) source;
   285      case FFI_TYPE_SINT8:
   286        return *(SINT8 *) source;
   287      case FFI_TYPE_UINT16:
   288        return *(UINT16 *) source;
   289      case FFI_TYPE_SINT16:
   290        return *(SINT16 *) source;
   291      case FFI_TYPE_UINT32:
   292        return *(UINT32 *) source;
   293      case FFI_TYPE_INT:
   294      case FFI_TYPE_SINT32:
   295        return *(SINT32 *) source;
   296      case FFI_TYPE_UINT64:
   297      case FFI_TYPE_SINT64:
   298        return *(UINT64 *) source;
   299        break;
   300      case FFI_TYPE_POINTER:
   301        return *(uintptr_t *) source;
   302      default:
   303        abort();
   304      }
   305  }
   306  
   307  static void
   308  extend_hfa_type (void *dest, void *src, int h)
   309  {
   310    int f = h - AARCH64_RET_S4;
   311    void *x0;
   312  
   313    asm volatile (
   314  	"adr	%0, 0f\n"
   315  "	add	%0, %0, %1\n"
   316  "	br	%0\n"
   317  "0:	ldp	s16, s17, [%3]\n"	/* S4 */
   318  "	ldp	s18, s19, [%3, #8]\n"
   319  "	b	4f\n"
   320  "	ldp	s16, s17, [%3]\n"	/* S3 */
   321  "	ldr	s18, [%3, #8]\n"
   322  "	b	3f\n"
   323  "	ldp	s16, s17, [%3]\n"	/* S2 */
   324  "	b	2f\n"
   325  "	nop\n"
   326  "	ldr	s16, [%3]\n"		/* S1 */
   327  "	b	1f\n"
   328  "	nop\n"
   329  "	ldp	d16, d17, [%3]\n"	/* D4 */
   330  "	ldp	d18, d19, [%3, #16]\n"
   331  "	b	4f\n"
   332  "	ldp	d16, d17, [%3]\n"	/* D3 */
   333  "	ldr	d18, [%3, #16]\n"
   334  "	b	3f\n"
   335  "	ldp	d16, d17, [%3]\n"	/* D2 */
   336  "	b	2f\n"
   337  "	nop\n"
   338  "	ldr	d16, [%3]\n"		/* D1 */
   339  "	b	1f\n"
   340  "	nop\n"
   341  "	ldp	q16, q17, [%3]\n"	/* Q4 */
   342  "	ldp	q18, q19, [%3, #16]\n"
   343  "	b	4f\n"
   344  "	ldp	q16, q17, [%3]\n"	/* Q3 */
   345  "	ldr	q18, [%3, #16]\n"
   346  "	b	3f\n"
   347  "	ldp	q16, q17, [%3]\n"	/* Q2 */
   348  "	b	2f\n"
   349  "	nop\n"
   350  "	ldr	q16, [%3]\n"		/* Q1 */
   351  "	b	1f\n"
   352  "4:	str	q19, [%2, #48]\n"
   353  "3:	str	q18, [%2, #32]\n"
   354  "2:	str	q17, [%2, #16]\n"
   355  "1:	str	q16, [%2]"
   356      : "=&r"(x0)
   357      : "r"(f * 12), "r"(dest), "r"(src)
   358      : "memory", "v16", "v17", "v18", "v19");
   359  }
   360  
   361  static void *
   362  compress_hfa_type (void *dest, void *reg, int h)
   363  {
   364    switch (h)
   365      {
   366      case AARCH64_RET_S1:
   367        if (dest == reg)
   368  	{
   369  #ifdef __AARCH64EB__
   370  	  dest += 12;
   371  #endif
   372  	}
   373        else
   374  	*(float *)dest = *(float *)reg;
   375        break;
   376      case AARCH64_RET_S2:
   377        asm ("ldp q16, q17, [%1]\n\t"
   378  	   "st2 { v16.s, v17.s }[0], [%0]"
   379  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
   380        break;
   381      case AARCH64_RET_S3:
   382        asm ("ldp q16, q17, [%1]\n\t"
   383  	   "ldr q18, [%1, #32]\n\t"
   384  	   "st3 { v16.s, v17.s, v18.s }[0], [%0]"
   385  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
   386        break;
   387      case AARCH64_RET_S4:
   388        asm ("ldp q16, q17, [%1]\n\t"
   389  	   "ldp q18, q19, [%1, #32]\n\t"
   390  	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
   391  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
   392        break;
   393  
   394      case AARCH64_RET_D1:
   395        if (dest == reg)
   396  	{
   397  #ifdef __AARCH64EB__
   398  	  dest += 8;
   399  #endif
   400  	}
   401        else
   402  	*(double *)dest = *(double *)reg;
   403        break;
   404      case AARCH64_RET_D2:
   405        asm ("ldp q16, q17, [%1]\n\t"
   406  	   "st2 { v16.d, v17.d }[0], [%0]"
   407  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
   408        break;
   409      case AARCH64_RET_D3:
   410        asm ("ldp q16, q17, [%1]\n\t"
   411  	   "ldr q18, [%1, #32]\n\t"
   412  	   "st3 { v16.d, v17.d, v18.d }[0], [%0]"
   413  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
   414        break;
   415      case AARCH64_RET_D4:
   416        asm ("ldp q16, q17, [%1]\n\t"
   417  	   "ldp q18, q19, [%1, #32]\n\t"
   418  	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
   419  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
   420        break;
   421  
   422      default:
   423        if (dest != reg)
   424  	return memcpy (dest, reg, 16 * (4 - (h & 3)));
   425        break;
   426      }
   427    return dest;
   428  }
   429  
   430  /* Either allocate an appropriate register for the argument type, or if
   431     none are available, allocate a stack slot and return a pointer
   432     to the allocated space.  */
   433  
   434  static void *
   435  allocate_int_to_reg_or_stack (struct call_context *context,
   436  			      struct arg_state *state,
   437  			      void *stack, size_t size)
   438  {
   439    if (state->ngrn < N_X_ARG_REG)
   440      return &context->x[state->ngrn++];
   441  
   442    state->ngrn = N_X_ARG_REG;
   443    return allocate_to_stack (state, stack, size, size);
   444  }
   445  
   446  ffi_status
   447  ffi_prep_cif_machdep (ffi_cif *cif)
   448  {
   449    ffi_type *rtype = cif->rtype;
   450    size_t bytes = cif->bytes;
   451    int flags, i, n;
   452  
   453    switch (rtype->type)
   454      {
   455      case FFI_TYPE_VOID:
   456        flags = AARCH64_RET_VOID;
   457        break;
   458      case FFI_TYPE_UINT8:
   459        flags = AARCH64_RET_UINT8;
   460        break;
   461      case FFI_TYPE_UINT16:
   462        flags = AARCH64_RET_UINT16;
   463        break;
   464      case FFI_TYPE_UINT32:
   465        flags = AARCH64_RET_UINT32;
   466        break;
   467      case FFI_TYPE_SINT8:
   468        flags = AARCH64_RET_SINT8;
   469        break;
   470      case FFI_TYPE_SINT16:
   471        flags = AARCH64_RET_SINT16;
   472        break;
   473      case FFI_TYPE_INT:
   474      case FFI_TYPE_SINT32:
   475        flags = AARCH64_RET_SINT32;
   476        break;
   477      case FFI_TYPE_SINT64:
   478      case FFI_TYPE_UINT64:
   479        flags = AARCH64_RET_INT64;
   480        break;
   481      case FFI_TYPE_POINTER:
   482        flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
   483        break;
   484  
   485      case FFI_TYPE_FLOAT:
   486      case FFI_TYPE_DOUBLE:
   487      case FFI_TYPE_LONGDOUBLE:
   488      case FFI_TYPE_STRUCT:
   489      case FFI_TYPE_COMPLEX:
   490        flags = is_vfp_type (rtype);
   491        if (flags == 0)
   492  	{
   493  	  size_t s = rtype->size;
   494  	  if (s > 16)
   495  	    {
   496  	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
   497  	      bytes += 8;
   498  	    }
   499  	  else if (s == 16)
   500  	    flags = AARCH64_RET_INT128;
   501  	  else if (s == 8)
   502  	    flags = AARCH64_RET_INT64;
   503  	  else
   504  	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
   505  	}
   506        break;
   507  
   508      default:
   509        abort();
   510      }
   511  
   512    for (i = 0, n = cif->nargs; i < n; i++)
   513      if (is_vfp_type (cif->arg_types[i]))
   514        {
   515  	flags |= AARCH64_FLAG_ARG_V;
   516  	break;
   517        }
   518  
   519    /* Round the stack up to a multiple of the stack alignment requirement. */
   520    cif->bytes = ALIGN(bytes, 16);
   521    cif->flags = flags;
   522  #if defined (__APPLE__)
   523    cif->aarch64_nfixedargs = 0;
   524  #endif
   525  
   526    return FFI_OK;
   527  }
   528  
   529  #if defined (__APPLE__)
   530  /* Perform Apple-specific cif processing for variadic calls */
   531  ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
   532  				    unsigned int nfixedargs,
   533  				    unsigned int ntotalargs)
   534  {
   535    ffi_status status = ffi_prep_cif_machdep (cif);
   536    cif->aarch64_nfixedargs = nfixedargs;
   537    return status;
   538  }
   539  #endif /* __APPLE__ */
   540  
   541  extern void ffi_call_SYSV (struct call_context *context, void *frame,
   542  			   void (*fn)(void), void *rvalue, int flags,
   543  			   void *closure) FFI_HIDDEN;
   544  
   545  /* Call a function with the provided arguments and capture the return
   546     value.  */
   547  static void
   548  ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
   549  	      void **avalue, void *closure)
   550  {
   551    struct call_context *context;
   552    void *stack, *frame, *rvalue;
   553    struct arg_state state;
   554    size_t stack_bytes, rtype_size, rsize;
   555    int i, nargs, flags;
   556    ffi_type *rtype;
   557  
   558    flags = cif->flags;
   559    rtype = cif->rtype;
   560    rtype_size = rtype->size;
   561    stack_bytes = cif->bytes;
   562  
   563    /* If the target function returns a structure via hidden pointer,
   564       then we cannot allow a null rvalue.  Otherwise, mash a null
   565       rvalue to void return type.  */
   566    rsize = 0;
   567    if (flags & AARCH64_RET_IN_MEM)
   568      {
   569        if (orig_rvalue == NULL)
   570  	rsize = rtype_size;
   571      }
   572    else if (orig_rvalue == NULL)
   573      flags &= AARCH64_FLAG_ARG_V;
   574    else if (flags & AARCH64_RET_NEED_COPY)
   575      rsize = 16;
   576  
   577    /* Allocate consectutive stack for everything we'll need.  */
   578    context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
   579    stack = context + 1;
   580    frame = stack + stack_bytes;
   581    rvalue = (rsize ? frame + 32 : orig_rvalue);
   582  
   583    arg_init (&state);
   584    for (i = 0, nargs = cif->nargs; i < nargs; i++)
   585      {
   586        ffi_type *ty = cif->arg_types[i];
   587        size_t s = ty->size;
   588        void *a = avalue[i];
   589        int h, t;
   590  
   591        t = ty->type;
   592        switch (t)
   593  	{
   594  	case FFI_TYPE_VOID:
   595  	  FFI_ASSERT (0);
   596  	  break;
   597  
   598  	/* If the argument is a basic type the argument is allocated to an
   599  	   appropriate register, or if none are available, to the stack.  */
   600  	case FFI_TYPE_INT:
   601  	case FFI_TYPE_UINT8:
   602  	case FFI_TYPE_SINT8:
   603  	case FFI_TYPE_UINT16:
   604  	case FFI_TYPE_SINT16:
   605  	case FFI_TYPE_UINT32:
   606  	case FFI_TYPE_SINT32:
   607  	case FFI_TYPE_UINT64:
   608  	case FFI_TYPE_SINT64:
   609  	case FFI_TYPE_POINTER:
   610  	do_pointer:
   611  	  {
   612  	    ffi_arg ext = extend_integer_type (a, t);
   613  	    if (state.ngrn < N_X_ARG_REG)
   614  	      context->x[state.ngrn++] = ext;
   615  	    else
   616  	      {
   617  		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
   618  		state.ngrn = N_X_ARG_REG;
   619  		/* Note that the default abi extends each argument
   620  		   to a full 64-bit slot, while the iOS abi allocates
   621  		   only enough space. */
   622  #ifdef __APPLE__
   623  		memcpy(d, a, s);
   624  #else
   625  		*(ffi_arg *)d = ext;
   626  #endif
   627  	      }
   628  	  }
   629  	  break;
   630  
   631  	case FFI_TYPE_FLOAT:
   632  	case FFI_TYPE_DOUBLE:
   633  	case FFI_TYPE_LONGDOUBLE:
   634  	case FFI_TYPE_STRUCT:
   635  	case FFI_TYPE_COMPLEX:
   636  	  {
   637  	    void *dest;
   638  
   639  	    h = is_vfp_type (ty);
   640  	    if (h)
   641  	      {
   642  		int elems = 4 - (h & 3);
   643  	        if (state.nsrn + elems <= N_V_ARG_REG)
   644  		  {
   645  		    dest = &context->v[state.nsrn];
   646  		    state.nsrn += elems;
   647  		    extend_hfa_type (dest, a, h);
   648  		    break;
   649  		  }
   650  		state.nsrn = N_V_ARG_REG;
   651  		dest = allocate_to_stack (&state, stack, ty->alignment, s);
   652  	      }
   653  	    else if (s > 16)
   654  	      {
   655  		/* If the argument is a composite type that is larger than 16
   656  		   bytes, then the argument has been copied to memory, and
   657  		   the argument is replaced by a pointer to the copy.  */
   658  		a = &avalue[i];
   659  		t = FFI_TYPE_POINTER;
   660  		goto do_pointer;
   661  	      }
   662  	    else
   663  	      {
   664  		size_t n = (s + 7) / 8;
   665  		if (state.ngrn + n <= N_X_ARG_REG)
   666  		  {
   667  		    /* If the argument is a composite type and the size in
   668  		       double-words is not more than the number of available
   669  		       X registers, then the argument is copied into
   670  		       consecutive X registers.  */
   671  		    dest = &context->x[state.ngrn];
   672  		    state.ngrn += n;
   673  		  }
   674  		else
   675  		  {
   676  		    /* Otherwise, there are insufficient X registers. Further
   677  		       X register allocations are prevented, the NSAA is
   678  		       adjusted and the argument is copied to memory at the
   679  		       adjusted NSAA.  */
   680  		    state.ngrn = N_X_ARG_REG;
   681  		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
   682  		  }
   683  		}
   684  	      memcpy (dest, a, s);
   685  	    }
   686  	  break;
   687  
   688  	default:
   689  	  abort();
   690  	}
   691  
   692  #if defined (__APPLE__)
   693        if (i + 1 == cif->aarch64_nfixedargs)
   694  	{
   695  	  state.ngrn = N_X_ARG_REG;
   696  	  state.nsrn = N_V_ARG_REG;
   697  	  state.allocating_variadic = 1;
   698  	}
   699  #endif
   700      }
   701  
   702    ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
   703  
   704    if (flags & AARCH64_RET_NEED_COPY)
   705      memcpy (orig_rvalue, rvalue, rtype_size);
   706  }
   707  
   708  void
   709  ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
   710  {
   711    ffi_call_int (cif, fn, rvalue, avalue, NULL);
   712  }
   713  
   714  #ifdef FFI_GO_CLOSURES
   715  void
   716  ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
   717  	     void **avalue, void *closure)
   718  {
   719    ffi_call_int (cif, fn, rvalue, avalue, closure);
   720  }
   721  #endif /* FFI_GO_CLOSURES */
   722  
   723  /* Build a trampoline.  */
   724  
   725  extern void ffi_closure_SYSV (void) FFI_HIDDEN;
   726  extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
   727  
   728  ffi_status
   729  ffi_prep_closure_loc (ffi_closure *closure,
   730                        ffi_cif* cif,
   731                        void (*fun)(ffi_cif*,void*,void**,void*),
   732                        void *user_data,
   733                        void *codeloc)
   734  {
   735    static const unsigned char trampoline[16] = {
   736      0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
   737      0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
   738      0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
   739    };
   740    char *tramp = closure->tramp;
   741    void (*start)(void);
   742  
   743    if (cif->abi != FFI_SYSV)
   744      return FFI_BAD_ABI;
   745  
   746    closure->cif = cif;
   747    closure->fun = fun;
   748    closure->user_data = user_data;
   749  
   750    memcpy (tramp, trampoline, sizeof(trampoline));
   751  
   752    if (cif->flags & AARCH64_FLAG_ARG_V)
   753      start = ffi_closure_SYSV_V;
   754    else
   755      start = ffi_closure_SYSV;
   756    *(UINT64 *)(tramp + 16) = (uintptr_t)start;
   757  
   758    ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
   759  
   760    return FFI_OK;
   761  }
   762  
   763  #ifdef FFI_GO_CLOSURES
   764  extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
   765  extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
   766  
   767  ffi_status
   768  ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
   769                       void (*fun)(ffi_cif*,void*,void**,void*))
   770  {
   771    void (*start)(void);
   772  
   773    if (cif->abi != FFI_SYSV)
   774      return FFI_BAD_ABI;
   775  
   776    if (cif->flags & AARCH64_FLAG_ARG_V)
   777      start = ffi_go_closure_SYSV_V;
   778    else
   779      start = ffi_go_closure_SYSV;
   780  
   781    closure->tramp = start;
   782    closure->cif = cif;
   783    closure->fun = fun;
   784  
   785    return FFI_OK;
   786  }
   787  #endif /* FFI_GO_CLOSURES */
   788  
   789  /* Primary handler to setup and invoke a function within a closure.
   790  
   791     A closure when invoked enters via the assembler wrapper
   792     ffi_closure_SYSV(). The wrapper allocates a call context on the
   793     stack, saves the interesting registers (from the perspective of
   794     the calling convention) into the context then passes control to
   795     ffi_closure_SYSV_inner() passing the saved context and a pointer to
   796     the stack at the point ffi_closure_SYSV() was invoked.
   797  
   798     On the return path the assembler wrapper will reload call context
   799     registers.
   800  
   801     ffi_closure_SYSV_inner() marshalls the call context into ffi value
   802     descriptors, invokes the wrapped function, then marshalls the return
   803     value back into the call context.  */
   804  
   805  int FFI_HIDDEN
   806  ffi_closure_SYSV_inner (ffi_cif *cif,
   807  			void (*fun)(ffi_cif*,void*,void**,void*),
   808  			void *user_data,
   809  			struct call_context *context,
   810  			void *stack, void *rvalue, void *struct_rvalue)
   811  {
   812    void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
   813    int i, h, nargs, flags;
   814    struct arg_state state;
   815  
   816    arg_init (&state);
   817  
   818    for (i = 0, nargs = cif->nargs; i < nargs; i++)
   819      {
   820        ffi_type *ty = cif->arg_types[i];
   821        int t = ty->type;
   822        size_t n, s = ty->size;
   823  
   824        switch (t)
   825  	{
   826  	case FFI_TYPE_VOID:
   827  	  FFI_ASSERT (0);
   828  	  break;
   829  
   830  	case FFI_TYPE_INT:
   831  	case FFI_TYPE_UINT8:
   832  	case FFI_TYPE_SINT8:
   833  	case FFI_TYPE_UINT16:
   834  	case FFI_TYPE_SINT16:
   835  	case FFI_TYPE_UINT32:
   836  	case FFI_TYPE_SINT32:
   837  	case FFI_TYPE_UINT64:
   838  	case FFI_TYPE_SINT64:
   839  	case FFI_TYPE_POINTER:
   840  	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
   841  	  break;
   842  
   843  	case FFI_TYPE_FLOAT:
   844  	case FFI_TYPE_DOUBLE:
   845  	case FFI_TYPE_LONGDOUBLE:
   846  	case FFI_TYPE_STRUCT:
   847  	case FFI_TYPE_COMPLEX:
   848  	  h = is_vfp_type (ty);
   849  	  if (h)
   850  	    {
   851  	      n = 4 - (h & 3);
   852  	      if (state.nsrn + n <= N_V_ARG_REG)
   853  		{
   854  		  void *reg = &context->v[state.nsrn];
   855  		  state.nsrn += n;
   856  
   857  		  /* Eeek! We need a pointer to the structure, however the
   858  		     homogeneous float elements are being passed in individual
   859  		     registers, therefore for float and double the structure
   860  		     is not represented as a contiguous sequence of bytes in
   861  		     our saved register context.  We don't need the original
   862  		     contents of the register storage, so we reformat the
   863  		     structure into the same memory.  */
   864  		  avalue[i] = compress_hfa_type (reg, reg, h);
   865  		}
   866  	      else
   867  		{
   868  		  state.nsrn = N_V_ARG_REG;
   869  		  avalue[i] = allocate_to_stack (&state, stack,
   870  						 ty->alignment, s);
   871  		}
   872  	    }
   873  	  else if (s > 16)
   874  	    {
   875  	      /* Replace Composite type of size greater than 16 with a
   876  		 pointer.  */
   877  	      avalue[i] = *(void **)
   878  		allocate_int_to_reg_or_stack (context, &state, stack,
   879  					      sizeof (void *));
   880  	    }
   881  	  else
   882  	    {
   883  	      n = (s + 7) / 8;
   884  	      if (state.ngrn + n <= N_X_ARG_REG)
   885  		{
   886  		  avalue[i] = &context->x[state.ngrn];
   887  		  state.ngrn += n;
   888  		}
   889  	      else
   890  		{
   891  		  state.ngrn = N_X_ARG_REG;
   892  		  avalue[i] = allocate_to_stack (&state, stack,
   893  						 ty->alignment, s);
   894  		}
   895  	    }
   896  	  break;
   897  
   898  	default:
   899  	  abort();
   900  	}
   901      }
   902  
   903    flags = cif->flags;
   904    if (flags & AARCH64_RET_IN_MEM)
   905      rvalue = struct_rvalue;
   906  
   907    fun (cif, rvalue, avalue, user_data);
   908  
   909    return flags;
   910  }