github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libffi/src/x86/ffi64.c (about)

     1  /* -----------------------------------------------------------------------
     2     ffi64.c - Copyright (c) 2013  The Written Word, Inc.
     3               Copyright (c) 2011  Anthony Green
     4               Copyright (c) 2008, 2010  Red Hat, Inc.
     5               Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
     6  
     7     x86-64 Foreign Function Interface
     8  
     9     Permission is hereby granted, free of charge, to any person obtaining
    10     a copy of this software and associated documentation files (the
    11     ``Software''), to deal in the Software without restriction, including
    12     without limitation the rights to use, copy, modify, merge, publish,
    13     distribute, sublicense, and/or sell copies of the Software, and to
    14     permit persons to whom the Software is furnished to do so, subject to
    15     the following conditions:
    16  
    17     The above copyright notice and this permission notice shall be included
    18     in all copies or substantial portions of the Software.
    19  
    20     THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
    21     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    22     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    23     NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    24     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    25     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    26     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    27     DEALINGS IN THE SOFTWARE.
    28     ----------------------------------------------------------------------- */
    29  
    30  #include <ffi.h>
    31  #include <ffi_common.h>
    32  
    33  #include <stdlib.h>
    34  #include <stdarg.h>
    35  #include <stdint.h>
    36  #include "internal64.h"
    37  
    38  #ifdef __x86_64__
    39  
    40  #define MAX_GPR_REGS 6
    41  #define MAX_SSE_REGS 8
    42  
    43  #if defined(__INTEL_COMPILER)
    44  #include "xmmintrin.h"
    45  #define UINT128 __m128
    46  #else
    47  #if defined(__SUNPRO_C)
    48  #include <sunmedia_types.h>
    49  #define UINT128 __m128i
    50  #else
    51  #define UINT128 __int128_t
    52  #endif
    53  #endif
    54  
    55  union big_int_union
    56  {
    57    UINT32 i32;
    58    UINT64 i64;
    59    UINT128 i128;
    60  };
    61  
    62  struct register_args
    63  {
    64    /* Registers for argument passing.  */
    65    UINT64 gpr[MAX_GPR_REGS];
    66    union big_int_union sse[MAX_SSE_REGS];
    67    UINT64 rax;	/* ssecount */
    68    UINT64 r10;	/* static chain */
    69  };
    70  
    71  extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
    72  			     void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
    73  
    74  /* All reference to register classes here is identical to the code in
    75     gcc/config/i386/i386.c. Do *not* change one without the other.  */
    76  
    77  /* Register class used for passing given 64bit part of the argument.
    78     These represent classes as documented by the PS ABI, with the
    79     exception of SSESF, SSEDF classes, that are basically SSE class,
    80     just gcc will use SF or DFmode move instead of DImode to avoid
    81     reformatting penalties.
    82  
    83     Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
    84     whenever possible (upper half does contain padding).  */
    85  enum x86_64_reg_class
    86    {
    87      X86_64_NO_CLASS,
    88      X86_64_INTEGER_CLASS,
    89      X86_64_INTEGERSI_CLASS,
    90      X86_64_SSE_CLASS,
    91      X86_64_SSESF_CLASS,
    92      X86_64_SSEDF_CLASS,
    93      X86_64_SSEUP_CLASS,
    94      X86_64_X87_CLASS,
    95      X86_64_X87UP_CLASS,
    96      X86_64_COMPLEX_X87_CLASS,
    97      X86_64_MEMORY_CLASS
    98    };
    99  
   100  #define MAX_CLASSES 4
   101  
   102  #define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
   103  
   104  /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
   105     of this code is to classify each 8bytes of incoming argument by the register
   106     class and assign registers accordingly.  */
   107  
   108  /* Return the union class of CLASS1 and CLASS2.
   109     See the x86-64 PS ABI for details.  */
   110  
   111  static enum x86_64_reg_class
   112  merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
   113  {
   114    /* Rule #1: If both classes are equal, this is the resulting class.  */
   115    if (class1 == class2)
   116      return class1;
   117  
   118    /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
   119       the other class.  */
   120    if (class1 == X86_64_NO_CLASS)
   121      return class2;
   122    if (class2 == X86_64_NO_CLASS)
   123      return class1;
   124  
   125    /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
   126    if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
   127      return X86_64_MEMORY_CLASS;
   128  
   129    /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
   130    if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
   131        || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
   132      return X86_64_INTEGERSI_CLASS;
   133    if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
   134        || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
   135      return X86_64_INTEGER_CLASS;
   136  
   137    /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
   138       MEMORY is used.  */
   139    if (class1 == X86_64_X87_CLASS
   140        || class1 == X86_64_X87UP_CLASS
   141        || class1 == X86_64_COMPLEX_X87_CLASS
   142        || class2 == X86_64_X87_CLASS
   143        || class2 == X86_64_X87UP_CLASS
   144        || class2 == X86_64_COMPLEX_X87_CLASS)
   145      return X86_64_MEMORY_CLASS;
   146  
   147    /* Rule #6: Otherwise class SSE is used.  */
   148    return X86_64_SSE_CLASS;
   149  }
   150  
   151  /* Classify the argument of type TYPE and mode MODE.
   152     CLASSES will be filled by the register class used to pass each word
   153     of the operand.  The number of words is returned.  In case the parameter
   154     should be passed in memory, 0 is returned. As a special case for zero
   155     sized containers, classes[0] will be NO_CLASS and 1 is returned.
   156  
   157     See the x86-64 PS ABI for details.
   158  */
   159  static size_t
   160  classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
   161  		   size_t byte_offset)
   162  {
   163    switch (type->type)
   164      {
   165      case FFI_TYPE_UINT8:
   166      case FFI_TYPE_SINT8:
   167      case FFI_TYPE_UINT16:
   168      case FFI_TYPE_SINT16:
   169      case FFI_TYPE_UINT32:
   170      case FFI_TYPE_SINT32:
   171      case FFI_TYPE_UINT64:
   172      case FFI_TYPE_SINT64:
   173      case FFI_TYPE_POINTER:
   174      do_integer:
   175        {
   176  	size_t size = byte_offset + type->size;
   177  
   178  	if (size <= 4)
   179  	  {
   180  	    classes[0] = X86_64_INTEGERSI_CLASS;
   181  	    return 1;
   182  	  }
   183  	else if (size <= 8)
   184  	  {
   185  	    classes[0] = X86_64_INTEGER_CLASS;
   186  	    return 1;
   187  	  }
   188  	else if (size <= 12)
   189  	  {
   190  	    classes[0] = X86_64_INTEGER_CLASS;
   191  	    classes[1] = X86_64_INTEGERSI_CLASS;
   192  	    return 2;
   193  	  }
   194  	else if (size <= 16)
   195  	  {
   196  	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
   197  	    return 2;
   198  	  }
   199  	else
   200  	  FFI_ASSERT (0);
   201        }
   202      case FFI_TYPE_FLOAT:
   203        if (!(byte_offset % 8))
   204  	classes[0] = X86_64_SSESF_CLASS;
   205        else
   206  	classes[0] = X86_64_SSE_CLASS;
   207        return 1;
   208      case FFI_TYPE_DOUBLE:
   209        classes[0] = X86_64_SSEDF_CLASS;
   210        return 1;
   211  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
   212      case FFI_TYPE_LONGDOUBLE:
   213        classes[0] = X86_64_X87_CLASS;
   214        classes[1] = X86_64_X87UP_CLASS;
   215        return 2;
   216  #endif
   217      case FFI_TYPE_STRUCT:
   218        {
   219  	const size_t UNITS_PER_WORD = 8;
   220  	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
   221  	ffi_type **ptr;
   222  	int i;
   223  	enum x86_64_reg_class subclasses[MAX_CLASSES];
   224  
   225  	/* If the struct is larger than 32 bytes, pass it on the stack.  */
   226  	if (type->size > 32)
   227  	  return 0;
   228  
   229  	for (i = 0; i < words; i++)
   230  	  classes[i] = X86_64_NO_CLASS;
   231  
   232  	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
   233  	   signalize memory class, so handle it as special case.  */
   234  	if (!words)
   235  	  {
   236      case FFI_TYPE_VOID:
   237  	    classes[0] = X86_64_NO_CLASS;
   238  	    return 1;
   239  	  }
   240  
   241  	/* Merge the fields of structure.  */
   242  	for (ptr = type->elements; *ptr != NULL; ptr++)
   243  	  {
   244  	    size_t num;
   245  
   246  	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
   247  
   248  	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
   249  	    if (num == 0)
   250  	      return 0;
   251  	    for (i = 0; i < num; i++)
   252  	      {
   253  		size_t pos = byte_offset / 8;
   254  		classes[i + pos] =
   255  		  merge_classes (subclasses[i], classes[i + pos]);
   256  	      }
   257  
   258  	    byte_offset += (*ptr)->size;
   259  	  }
   260  
   261  	if (words > 2)
   262  	  {
   263  	    /* When size > 16 bytes, if the first one isn't
   264  	       X86_64_SSE_CLASS or any other ones aren't
   265  	       X86_64_SSEUP_CLASS, everything should be passed in
   266  	       memory.  */
   267  	    if (classes[0] != X86_64_SSE_CLASS)
   268  	      return 0;
   269  
   270  	    for (i = 1; i < words; i++)
   271  	      if (classes[i] != X86_64_SSEUP_CLASS)
   272  		return 0;
   273  	  }
   274  
   275  	/* Final merger cleanup.  */
   276  	for (i = 0; i < words; i++)
   277  	  {
   278  	    /* If one class is MEMORY, everything should be passed in
   279  	       memory.  */
   280  	    if (classes[i] == X86_64_MEMORY_CLASS)
   281  	      return 0;
   282  
   283  	    /* The X86_64_SSEUP_CLASS should be always preceded by
   284  	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
   285  	    if (classes[i] == X86_64_SSEUP_CLASS
   286  		&& classes[i - 1] != X86_64_SSE_CLASS
   287  		&& classes[i - 1] != X86_64_SSEUP_CLASS)
   288  	      {
   289  		/* The first one should never be X86_64_SSEUP_CLASS.  */
   290  		FFI_ASSERT (i != 0);
   291  		classes[i] = X86_64_SSE_CLASS;
   292  	      }
   293  
   294  	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
   295  		everything should be passed in memory.  */
   296  	    if (classes[i] == X86_64_X87UP_CLASS
   297  		&& (classes[i - 1] != X86_64_X87_CLASS))
   298  	      {
   299  		/* The first one should never be X86_64_X87UP_CLASS.  */
   300  		FFI_ASSERT (i != 0);
   301  		return 0;
   302  	      }
   303  	  }
   304  	return words;
   305        }
   306      case FFI_TYPE_COMPLEX:
   307        {
   308  	ffi_type *inner = type->elements[0];
   309  	switch (inner->type)
   310  	  {
   311  	  case FFI_TYPE_INT:
   312  	  case FFI_TYPE_UINT8:
   313  	  case FFI_TYPE_SINT8:
   314  	  case FFI_TYPE_UINT16:
   315  	  case FFI_TYPE_SINT16:
   316  	  case FFI_TYPE_UINT32:
   317  	  case FFI_TYPE_SINT32:
   318  	  case FFI_TYPE_UINT64:
   319  	  case FFI_TYPE_SINT64:
   320  	    goto do_integer;
   321  
   322  	  case FFI_TYPE_FLOAT:
   323  	    classes[0] = X86_64_SSE_CLASS;
   324  	    if (byte_offset % 8)
   325  	      {
   326  		classes[1] = X86_64_SSESF_CLASS;
   327  		return 2;
   328  	      }
   329  	    return 1;
   330  	  case FFI_TYPE_DOUBLE:
   331  	    classes[0] = classes[1] = X86_64_SSEDF_CLASS;
   332  	    return 2;
   333  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
   334  	  case FFI_TYPE_LONGDOUBLE:
   335  	    classes[0] = X86_64_COMPLEX_X87_CLASS;
   336  	    return 1;
   337  #endif
   338  	  }
   339        }
   340      }
   341    abort();
   342  }
   343  
   344  /* Examine the argument and return set number of register required in each
   345     class.  Return zero iff parameter should be passed in memory, otherwise
   346     the number of registers.  */
   347  
   348  static size_t
   349  examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
   350  		  _Bool in_return, int *pngpr, int *pnsse)
   351  {
   352    size_t n;
   353    int i, ngpr, nsse;
   354  
   355    n = classify_argument (type, classes, 0);
   356    if (n == 0)
   357      return 0;
   358  
   359    ngpr = nsse = 0;
   360    for (i = 0; i < n; ++i)
   361      switch (classes[i])
   362        {
   363        case X86_64_INTEGER_CLASS:
   364        case X86_64_INTEGERSI_CLASS:
   365  	ngpr++;
   366  	break;
   367        case X86_64_SSE_CLASS:
   368        case X86_64_SSESF_CLASS:
   369        case X86_64_SSEDF_CLASS:
   370  	nsse++;
   371  	break;
   372        case X86_64_NO_CLASS:
   373        case X86_64_SSEUP_CLASS:
   374  	break;
   375        case X86_64_X87_CLASS:
   376        case X86_64_X87UP_CLASS:
   377        case X86_64_COMPLEX_X87_CLASS:
   378  	return in_return != 0;
   379        default:
   380  	abort ();
   381        }
   382  
   383    *pngpr = ngpr;
   384    *pnsse = nsse;
   385  
   386    return n;
   387  }
   388  
   389  /* Perform machine dependent cif processing.  */
   390  
   391  ffi_status
   392  ffi_prep_cif_machdep (ffi_cif *cif)
   393  {
   394    int gprcount, ssecount, i, avn, ngpr, nsse, flags;
   395    enum x86_64_reg_class classes[MAX_CLASSES];
   396    size_t bytes, n, rtype_size;
   397    ffi_type *rtype;
   398  
   399    if (cif->abi != FFI_UNIX64)
   400      return FFI_BAD_ABI;
   401  
   402    gprcount = ssecount = 0;
   403  
   404    rtype = cif->rtype;
   405    rtype_size = rtype->size;
   406    switch (rtype->type)
   407      {
   408      case FFI_TYPE_VOID:
   409        flags = UNIX64_RET_VOID;
   410        break;
   411      case FFI_TYPE_UINT8:
   412        flags = UNIX64_RET_UINT8;
   413        break;
   414      case FFI_TYPE_SINT8:
   415        flags = UNIX64_RET_SINT8;
   416        break;
   417      case FFI_TYPE_UINT16:
   418        flags = UNIX64_RET_UINT16;
   419        break;
   420      case FFI_TYPE_SINT16:
   421        flags = UNIX64_RET_SINT16;
   422        break;
   423      case FFI_TYPE_UINT32:
   424        flags = UNIX64_RET_UINT32;
   425        break;
   426      case FFI_TYPE_INT:
   427      case FFI_TYPE_SINT32:
   428        flags = UNIX64_RET_SINT32;
   429        break;
   430      case FFI_TYPE_UINT64:
   431      case FFI_TYPE_SINT64:
   432        flags = UNIX64_RET_INT64;
   433        break;
   434      case FFI_TYPE_POINTER:
   435        flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
   436        break;
   437      case FFI_TYPE_FLOAT:
   438        flags = UNIX64_RET_XMM32;
   439        break;
   440      case FFI_TYPE_DOUBLE:
   441        flags = UNIX64_RET_XMM64;
   442        break;
   443      case FFI_TYPE_LONGDOUBLE:
   444        flags = UNIX64_RET_X87;
   445        break;
   446      case FFI_TYPE_STRUCT:
   447        n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
   448        if (n == 0)
   449  	{
   450  	  /* The return value is passed in memory.  A pointer to that
   451  	     memory is the first argument.  Allocate a register for it.  */
   452  	  gprcount++;
   453  	  /* We don't have to do anything in asm for the return.  */
   454  	  flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
   455  	}
   456        else
   457  	{
   458  	  _Bool sse0 = SSE_CLASS_P (classes[0]);
   459  
   460  	  if (rtype_size == 4 && sse0)
   461  	    flags = UNIX64_RET_XMM32;
   462  	  else if (rtype_size == 8)
   463  	    flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
   464  	  else
   465  	    {
   466  	      _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
   467  	      if (sse0 && sse1)
   468  		flags = UNIX64_RET_ST_XMM0_XMM1;
   469  	      else if (sse0)
   470  		flags = UNIX64_RET_ST_XMM0_RAX;
   471  	      else if (sse1)
   472  		flags = UNIX64_RET_ST_RAX_XMM0;
   473  	      else
   474  		flags = UNIX64_RET_ST_RAX_RDX;
   475  	      flags |= rtype_size << UNIX64_SIZE_SHIFT;
   476  	    }
   477  	}
   478        break;
   479      case FFI_TYPE_COMPLEX:
   480        switch (rtype->elements[0]->type)
   481  	{
   482  	case FFI_TYPE_UINT8:
   483  	case FFI_TYPE_SINT8:
   484  	case FFI_TYPE_UINT16:
   485  	case FFI_TYPE_SINT16:
   486  	case FFI_TYPE_INT:
   487  	case FFI_TYPE_UINT32:
   488  	case FFI_TYPE_SINT32:
   489  	case FFI_TYPE_UINT64:
   490  	case FFI_TYPE_SINT64:
   491  	  flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
   492  	  break;
   493  	case FFI_TYPE_FLOAT:
   494  	  flags = UNIX64_RET_XMM64;
   495  	  break;
   496  	case FFI_TYPE_DOUBLE:
   497  	  flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
   498  	  break;
   499  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
   500  	case FFI_TYPE_LONGDOUBLE:
   501  	  flags = UNIX64_RET_X87_2;
   502  	  break;
   503  #endif
   504  	default:
   505  	  return FFI_BAD_TYPEDEF;
   506  	}
   507        break;
   508      default:
   509        return FFI_BAD_TYPEDEF;
   510      }
   511  
   512    /* Go over all arguments and determine the way they should be passed.
   513       If it's in a register and there is space for it, let that be so. If
   514       not, add it's size to the stack byte count.  */
   515    for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
   516      {
   517        if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
   518  	  || gprcount + ngpr > MAX_GPR_REGS
   519  	  || ssecount + nsse > MAX_SSE_REGS)
   520  	{
   521  	  long align = cif->arg_types[i]->alignment;
   522  
   523  	  if (align < 8)
   524  	    align = 8;
   525  
   526  	  bytes = ALIGN (bytes, align);
   527  	  bytes += cif->arg_types[i]->size;
   528  	}
   529        else
   530  	{
   531  	  gprcount += ngpr;
   532  	  ssecount += nsse;
   533  	}
   534      }
   535    if (ssecount)
   536      flags |= UNIX64_FLAG_XMM_ARGS;
   537  
   538    cif->flags = flags;
   539    cif->bytes = ALIGN (bytes, 8);
   540  
   541    return FFI_OK;
   542  }
   543  
   544  static void
   545  ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
   546  	      void **avalue, void *closure)
   547  {
   548    enum x86_64_reg_class classes[MAX_CLASSES];
   549    char *stack, *argp;
   550    ffi_type **arg_types;
   551    int gprcount, ssecount, ngpr, nsse, i, avn, flags;
   552    struct register_args *reg_args;
   553  
   554    /* Can't call 32-bit mode from 64-bit mode.  */
   555    FFI_ASSERT (cif->abi == FFI_UNIX64);
   556  
   557    /* If the return value is a struct and we don't have a return value
   558       address then we need to make one.  Otherwise we can ignore it.  */
   559    flags = cif->flags;
   560    if (rvalue == NULL)
   561      {
   562        if (flags & UNIX64_FLAG_RET_IN_MEM)
   563  	rvalue = alloca (cif->rtype->size);
   564        else
   565  	flags = UNIX64_RET_VOID;
   566      }
   567  
   568    /* Allocate the space for the arguments, plus 4 words of temp space.  */
   569    stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
   570    reg_args = (struct register_args *) stack;
   571    argp = stack + sizeof (struct register_args);
   572  
   573    reg_args->r10 = (uintptr_t) closure;
   574  
   575    gprcount = ssecount = 0;
   576  
   577    /* If the return value is passed in memory, add the pointer as the
   578       first integer argument.  */
   579    if (flags & UNIX64_FLAG_RET_IN_MEM)
   580      reg_args->gpr[gprcount++] = (unsigned long) rvalue;
   581  
   582    avn = cif->nargs;
   583    arg_types = cif->arg_types;
   584  
   585    for (i = 0; i < avn; ++i)
   586      {
   587        size_t n, size = arg_types[i]->size;
   588  
   589        n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
   590        if (n == 0
   591  	  || gprcount + ngpr > MAX_GPR_REGS
   592  	  || ssecount + nsse > MAX_SSE_REGS)
   593  	{
   594  	  long align = arg_types[i]->alignment;
   595  
   596  	  /* Stack arguments are *always* at least 8 byte aligned.  */
   597  	  if (align < 8)
   598  	    align = 8;
   599  
   600  	  /* Pass this argument in memory.  */
   601  	  argp = (void *) ALIGN (argp, align);
   602  	  memcpy (argp, avalue[i], size);
   603  	  argp += size;
   604  	}
   605        else
   606  	{
   607  	  /* The argument is passed entirely in registers.  */
   608  	  char *a = (char *) avalue[i];
   609  	  int j;
   610  
   611  	  for (j = 0; j < n; j++, a += 8, size -= 8)
   612  	    {
   613  	      switch (classes[j])
   614  		{
   615  		case X86_64_NO_CLASS:
   616  		case X86_64_SSEUP_CLASS:
   617  		  break;
   618  		case X86_64_INTEGER_CLASS:
   619  		case X86_64_INTEGERSI_CLASS:
   620  		  /* Sign-extend integer arguments passed in general
   621  		     purpose registers, to cope with the fact that
   622  		     LLVM incorrectly assumes that this will be done
   623  		     (the x86-64 PS ABI does not specify this). */
   624  		  switch (arg_types[i]->type)
   625  		    {
   626  		    case FFI_TYPE_SINT8:
   627  		      reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
   628  		      break;
   629  		    case FFI_TYPE_SINT16:
   630  		      reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
   631  		      break;
   632  		    case FFI_TYPE_SINT32:
   633  		      reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
   634  		      break;
   635  		    default:
   636  		      reg_args->gpr[gprcount] = 0;
   637  		      memcpy (&reg_args->gpr[gprcount], a, size);
   638  		    }
   639  		  gprcount++;
   640  		  break;
   641  		case X86_64_SSE_CLASS:
   642  		case X86_64_SSEDF_CLASS:
   643  		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
   644  		  break;
   645  		case X86_64_SSESF_CLASS:
   646  		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
   647  		  break;
   648  		default:
   649  		  abort();
   650  		}
   651  	    }
   652  	}
   653      }
   654    reg_args->rax = ssecount;
   655  
   656    ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
   657  		   flags, rvalue, fn);
   658  }
   659  
   660  void
   661  ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
   662  {
   663    ffi_call_int (cif, fn, rvalue, avalue, NULL);
   664  }
   665  
   666  void
   667  ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
   668  	     void **avalue, void *closure)
   669  {
   670    ffi_call_int (cif, fn, rvalue, avalue, closure);
   671  }
   672  
   673  extern void ffi_closure_unix64(void) FFI_HIDDEN;
   674  extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
   675  
   676  ffi_status
   677  ffi_prep_closure_loc (ffi_closure* closure,
   678  		      ffi_cif* cif,
   679  		      void (*fun)(ffi_cif*, void*, void**, void*),
   680  		      void *user_data,
   681  		      void *codeloc)
   682  {
   683    static const unsigned char trampoline[16] = {
   684      /* leaq  -0x7(%rip),%r10   # 0x0  */
   685      0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
   686      /* jmpq  *0x3(%rip)        # 0x10 */
   687      0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
   688      /* nopl  (%rax) */
   689      0x0f, 0x1f, 0x00
   690    };
   691    void (*dest)(void);
   692    char *tramp = closure->tramp;
   693  
   694    if (cif->abi != FFI_UNIX64)
   695      return FFI_BAD_ABI;
   696  
   697    if (cif->flags & UNIX64_FLAG_XMM_ARGS)
   698      dest = ffi_closure_unix64_sse;
   699    else
   700      dest = ffi_closure_unix64;
   701  
   702    memcpy (tramp, trampoline, sizeof(trampoline));
   703    *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
   704  
   705    closure->cif = cif;
   706    closure->fun = fun;
   707    closure->user_data = user_data;
   708  
   709    return FFI_OK;
   710  }
   711  
   712  int FFI_HIDDEN
   713  ffi_closure_unix64_inner(ffi_cif *cif,
   714  			 void (*fun)(ffi_cif*, void*, void**, void*),
   715  			 void *user_data,
   716  			 void *rvalue,
   717  			 struct register_args *reg_args,
   718  			 char *argp)
   719  {
   720    void **avalue;
   721    ffi_type **arg_types;
   722    long i, avn;
   723    int gprcount, ssecount, ngpr, nsse;
   724    int flags;
   725  
   726    avn = cif->nargs;
   727    flags = cif->flags;
   728    avalue = alloca(avn * sizeof(void *));
   729    gprcount = ssecount = 0;
   730  
   731    if (flags & UNIX64_FLAG_RET_IN_MEM)
   732      {
   733        /* On return, %rax will contain the address that was passed
   734  	 by the caller in %rdi.  */
   735        void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
   736        *(void **)rvalue = r;
   737        rvalue = r;
   738        flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
   739      }
   740  
   741    arg_types = cif->arg_types;
   742    for (i = 0; i < avn; ++i)
   743      {
   744        enum x86_64_reg_class classes[MAX_CLASSES];
   745        size_t n;
   746  
   747        n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
   748        if (n == 0
   749  	  || gprcount + ngpr > MAX_GPR_REGS
   750  	  || ssecount + nsse > MAX_SSE_REGS)
   751  	{
   752  	  long align = arg_types[i]->alignment;
   753  
   754  	  /* Stack arguments are *always* at least 8 byte aligned.  */
   755  	  if (align < 8)
   756  	    align = 8;
   757  
   758  	  /* Pass this argument in memory.  */
   759  	  argp = (void *) ALIGN (argp, align);
   760  	  avalue[i] = argp;
   761  	  argp += arg_types[i]->size;
   762  	}
   763        /* If the argument is in a single register, or two consecutive
   764  	 integer registers, then we can use that address directly.  */
   765        else if (n == 1
   766  	       || (n == 2 && !(SSE_CLASS_P (classes[0])
   767  			       || SSE_CLASS_P (classes[1]))))
   768  	{
   769  	  /* The argument is in a single register.  */
   770  	  if (SSE_CLASS_P (classes[0]))
   771  	    {
   772  	      avalue[i] = &reg_args->sse[ssecount];
   773  	      ssecount += n;
   774  	    }
   775  	  else
   776  	    {
   777  	      avalue[i] = &reg_args->gpr[gprcount];
   778  	      gprcount += n;
   779  	    }
   780  	}
   781        /* Otherwise, allocate space to make them consecutive.  */
   782        else
   783  	{
   784  	  char *a = alloca (16);
   785  	  int j;
   786  
   787  	  avalue[i] = a;
   788  	  for (j = 0; j < n; j++, a += 8)
   789  	    {
   790  	      if (SSE_CLASS_P (classes[j]))
   791  		memcpy (a, &reg_args->sse[ssecount++], 8);
   792  	      else
   793  		memcpy (a, &reg_args->gpr[gprcount++], 8);
   794  	    }
   795  	}
   796      }
   797  
   798    /* Invoke the closure.  */
   799    fun (cif, rvalue, avalue, user_data);
   800  
   801    /* Tell assembly how to perform return type promotions.  */
   802    return flags;
   803  }
   804  
   805  extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
   806  extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
   807  
   808  ffi_status
   809  ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
   810  		     void (*fun)(ffi_cif*, void*, void**, void*))
   811  {
   812    if (cif->abi != FFI_UNIX64)
   813      return FFI_BAD_ABI;
   814  
   815    closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
   816  		    ? ffi_go_closure_unix64_sse
   817  		    : ffi_go_closure_unix64);
   818    closure->cif = cif;
   819    closure->fun = fun;
   820  
   821    return FFI_OK;
   822  }
   823  
   824  #endif /* __x86_64__ */