github.com/prattmic/llgo-embedded@v0.0.0-20150820070356-41cfecea0e1e/third_party/gofrontend/libffi/src/tile/tile.S (about)

     1  /* -----------------------------------------------------------------------
     2     tile.S - Copyright (c) 2011 Tilera Corp.
     3  
     4     Tilera TILEPro and TILE-Gx Foreign Function Interface
     5  
     6     Permission is hereby granted, free of charge, to any person obtaining
     7     a copy of this software and associated documentation files (the
     8     ``Software''), to deal in the Software without restriction, including
     9     without limitation the rights to use, copy, modify, merge, publish,
    10     distribute, sublicense, and/or sell copies of the Software, and to
    11     permit persons to whom the Software is furnished to do so, subject to
    12     the following conditions:
    13  
    14     The above copyright notice and this permission notice shall be included
    15     in all copies or substantial portions of the Software.
    16  
    17     THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
    18     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    20     NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    21     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    22     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    24     DEALINGS IN THE SOFTWARE.
    25     ----------------------------------------------------------------------- */
    26  
    27  #define LIBFFI_ASM
    28  #include <fficonfig.h>
    29  #include <ffi.h>
    30  
    31  /* Number of bytes in a register. */
    32  #define REG_SIZE FFI_SIZEOF_ARG
    33  
    34  /* Number of bytes in stack linkage area for backtracing.
    35  
    36     A note about the ABI: on entry to a procedure, sp points to a stack
    37     slot where it must spill the return address if it's not a leaf.
    38     REG_SIZE bytes beyond that is a slot owned by the caller which
    39     contains the sp value that the caller had when it was originally
    40     entered (i.e. the caller's frame pointer). */
    41  #define LINKAGE_SIZE (2 * REG_SIZE)
    42  
    43  /* The first 10 registers are used to pass arguments and return values. */
    44  #define NUM_ARG_REGS 10
    45  
    46  #ifdef __tilegx__
    47  #define SW st
    48  #define LW ld
    49  #define BGZT bgtzt
    50  #else
    51  #define SW sw
    52  #define LW lw
    53  #define BGZT bgzt
    54  #endif
    55  
    56  
    57  /* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
    58                         const int_reg_t *stack_args,
    59                         unsigned long stack_args_bytes,
    60                         void (*fnaddr)(void));
    61  
    62          On entry, REG_ARGS contain the outgoing register values,
    63          and STACK_ARGS contains STACK_ARG_BYTES of additional values
    64          to be passed on the stack. If STACK_ARG_BYTES is zero, then
    65          STACK_ARGS is ignored.
    66  
    67          When the invoked function returns, the values of r0-r9 are
    68          blindly stored back into REG_ARGS for the caller to examine. */
    69  
    70          .section .text.ffi_call_tile, "ax", @progbits
    71          .align  8
    72          .globl  ffi_call_tile
    73          FFI_HIDDEN(ffi_call_tile)
    74  ffi_call_tile:
    75  
    76  /* Incoming arguments. */
    77  #define REG_ARGS                r0
    78  #define INCOMING_STACK_ARGS     r1
    79  #define STACK_ARG_BYTES         r2
    80  #define ORIG_FNADDR             r3
    81  
    82  /* Temporary values. */
    83  #define FRAME_SIZE              r10
    84  #define TMP                     r11
    85  #define TMP2                    r12
    86  #define OUTGOING_STACK_ARGS     r13
    87  #define REG_ADDR_PTR            r14
    88  #define RETURN_REG_ADDR         r15
    89  #define FNADDR                  r16
    90  
    91          .cfi_startproc
    92          {
    93           /* Save return address. */
    94           SW     sp, lr
    95           .cfi_offset lr, 0
    96           /* Prepare to spill incoming r52. */
    97           addi   TMP, sp, -REG_SIZE
    98           /* Increase frame size to have room to spill r52 and REG_ARGS.
    99              The +7 is to round up mod 8. */
   100           addi   FRAME_SIZE, STACK_ARG_BYTES, \
   101                  REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
   102          }
   103          {
   104           /* Round stack frame size to a multiple of 8 to satisfy ABI. */
   105           andi   FRAME_SIZE, FRAME_SIZE, -8
   106           /* Compute where to spill REG_ARGS value. */
   107           addi   TMP2, sp, -(REG_SIZE * 2)
   108          }
   109          {
   110           /* Spill incoming r52. */
   111           SW     TMP, r52
   112           .cfi_offset r52, -REG_SIZE
   113           /* Set up our frame pointer. */
   114           move   r52, sp
   115           .cfi_def_cfa_register r52
   116           /* Push stack frame. */
   117           sub    sp, sp, FRAME_SIZE
   118          }
   119          {
   120           /* Prepare to set up stack linkage. */
   121           addi   TMP, sp, REG_SIZE
   122           /* Prepare to memcpy stack args. */
   123           addi   OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
   124           /* Save REG_ARGS which we will need after we call the subroutine. */
   125           SW     TMP2, REG_ARGS
   126          }
   127          {
   128           /* Set up linkage info to hold incoming stack pointer. */
   129           SW     TMP, r52
   130          }
   131          {
   132           /* Skip stack args memcpy if we don't have any stack args (common). */
   133           blezt  STACK_ARG_BYTES, .Ldone_stack_args_memcpy
   134          }
   135  
   136  .Lmemcpy_stack_args:
   137          {
   138           /* Load incoming argument from stack_args. */
   139           LW     TMP, INCOMING_STACK_ARGS
   140           addi   INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
   141          }
   142          {
   143           /* Store stack argument into outgoing stack argument area. */
   144           SW     OUTGOING_STACK_ARGS, TMP
   145           addi   OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
   146           addi   STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
   147          }
   148          {
   149           BGZT   STACK_ARG_BYTES, .Lmemcpy_stack_args
   150          }
   151  .Ldone_stack_args_memcpy:
   152  
   153          {
   154           /* Copy aside ORIG_FNADDR so we can overwrite its register. */
   155           move   FNADDR, ORIG_FNADDR
   156           /* Prepare to load argument registers. */
   157           addi   REG_ADDR_PTR, r0, REG_SIZE
   158           /* Load outgoing r0. */
   159           LW     r0, r0
   160          }
   161  
   162          /* Load up argument registers from the REG_ARGS array. */
   163  #define LOAD_REG(REG, PTR) \
   164          { \
   165           LW     REG, PTR ; \
   166           addi   PTR, PTR, REG_SIZE \
   167          }
   168  
   169          LOAD_REG(r1, REG_ADDR_PTR)
   170          LOAD_REG(r2, REG_ADDR_PTR)
   171          LOAD_REG(r3, REG_ADDR_PTR)
   172          LOAD_REG(r4, REG_ADDR_PTR)
   173          LOAD_REG(r5, REG_ADDR_PTR)
   174          LOAD_REG(r6, REG_ADDR_PTR)
   175          LOAD_REG(r7, REG_ADDR_PTR)
   176          LOAD_REG(r8, REG_ADDR_PTR)
   177          LOAD_REG(r9, REG_ADDR_PTR)
   178  
   179          {
   180           /* Call the subroutine. */
   181           jalr   FNADDR
   182          }
   183  
   184          {
   185           /* Restore original lr. */
   186           LW     lr, r52
   187           /* Prepare to recover ARGS, which we spilled earlier. */
   188           addi   TMP, r52, -(2 * REG_SIZE)
   189          }
   190          {
   191           /* Restore ARGS, so we can fill it in with the return regs r0-r9. */
   192           LW     RETURN_REG_ADDR, TMP
   193           /* Prepare to restore original r52. */
   194           addi   TMP, r52, -REG_SIZE
   195          }
   196  
   197          {
   198           /* Pop stack frame. */
   199           move   sp, r52
   200           /* Restore original r52. */
   201           LW     r52, TMP
   202          }
   203  
   204  #define STORE_REG(REG, PTR) \
   205          { \
   206           SW     PTR, REG ; \
   207           addi   PTR, PTR, REG_SIZE \
   208          }
   209  
   210          /* Return all register values by reference. */
   211          STORE_REG(r0, RETURN_REG_ADDR)
   212          STORE_REG(r1, RETURN_REG_ADDR)
   213          STORE_REG(r2, RETURN_REG_ADDR)
   214          STORE_REG(r3, RETURN_REG_ADDR)
   215          STORE_REG(r4, RETURN_REG_ADDR)
   216          STORE_REG(r5, RETURN_REG_ADDR)
   217          STORE_REG(r6, RETURN_REG_ADDR)
   218          STORE_REG(r7, RETURN_REG_ADDR)
   219          STORE_REG(r8, RETURN_REG_ADDR)
   220          STORE_REG(r9, RETURN_REG_ADDR)
   221  
   222          {
   223           jrp    lr
   224          }
   225  
   226          .cfi_endproc
   227          .size ffi_call_tile, .-ffi_call_tile
   228  
   229  /* ffi_closure_tile(...)
   230  
   231     On entry, lr points to the closure plus 8 bytes, and r10
   232     contains the actual return address.
   233  
   234     This function simply dumps all register parameters into a stack array
   235     and passes the closure, the registers array, and the stack arguments
   236     to C code that does all of the actual closure processing. */
   237  
   238          .section .text.ffi_closure_tile, "ax", @progbits
   239          .align  8
   240          .globl  ffi_closure_tile
   241          FFI_HIDDEN(ffi_closure_tile)
   242  
   243          .cfi_startproc
   244  /* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
   245  #define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
   246  ffi_closure_tile:
   247          {
   248  #ifdef __tilegx__
   249           st     sp, lr
   250           .cfi_offset lr, 0
   251  #else
   252           /* Save return address (in r10 due to closure stub wrapper). */
   253           SW     sp, r10
   254           .cfi_return_column r10
   255           .cfi_offset r10, 0
   256  #endif
   257           /* Compute address for stack frame linkage. */
   258           addli   r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
   259          }
   260          {
   261           /* Save incoming stack pointer in linkage area. */
   262           SW     r10, sp
   263           .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
   264           /* Push a new stack frame. */
   265           addli   sp, sp, -CLOSURE_FRAME_SIZE
   266           .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
   267          }
   268  
   269          {
   270           /* Create pointer to where to start spilling registers. */
   271           addi   r10, sp, LINKAGE_SIZE
   272          }
   273  
   274          /* Spill all the incoming registers. */
   275          STORE_REG(r0, r10)
   276          STORE_REG(r1, r10)
   277          STORE_REG(r2, r10)
   278          STORE_REG(r3, r10)
   279          STORE_REG(r4, r10)
   280          STORE_REG(r5, r10)
   281          STORE_REG(r6, r10)
   282          STORE_REG(r7, r10)
   283          STORE_REG(r8, r10)
   284          {
   285           /* Save r9. */
   286           SW     r10, r9
   287  #ifdef __tilegx__
   288           /* Pointer to closure is passed in r11. */
   289           move  r0, r11
   290  #else
   291           /* Compute pointer to the closure object. Because the closure
   292              starts with a "jal ffi_closure_tile", we can just take the
   293              value of lr (a phony return address pointing into the closure)
   294              and subtract 8. */
   295           addi   r0, lr, -8
   296  #endif
   297           /* Compute a pointer to the register arguments we just spilled. */
   298           addi   r1, sp, LINKAGE_SIZE
   299          }
   300          {
   301           /* Compute a pointer to the extra stack arguments (if any). */
   302           addli   r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
   303           /* Call C code to deal with all of the grotty details. */
   304           jal    ffi_closure_tile_inner
   305          }
   306          {
   307           addli   r10, sp, CLOSURE_FRAME_SIZE
   308          }
   309          {
   310           /* Restore the return address. */
   311           LW     lr, r10
   312           /* Compute pointer to registers array. */
   313           addli   r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
   314          }
   315          /* Return all the register values, which C code may have set. */
   316          LOAD_REG(r0, r10)
   317          LOAD_REG(r1, r10)
   318          LOAD_REG(r2, r10)
   319          LOAD_REG(r3, r10)
   320          LOAD_REG(r4, r10)
   321          LOAD_REG(r5, r10)
   322          LOAD_REG(r6, r10)
   323          LOAD_REG(r7, r10)
   324          LOAD_REG(r8, r10)
   325          LOAD_REG(r9, r10)
   326          {
   327           /* Pop the frame. */
   328           addli   sp, sp, CLOSURE_FRAME_SIZE
   329           jrp    lr
   330          }
   331  
   332          .cfi_endproc
   333          .size   ffi_closure_tile, . - ffi_closure_tile
   334  
   335  
   336  /* What follows are code template instructions that get copied to the
   337     closure trampoline by ffi_prep_closure_loc.  The zeroed operands
   338     get replaced by their proper values at runtime. */
   339  
   340          .section .text.ffi_template_tramp_tile, "ax", @progbits
   341          .align  8
   342          .globl  ffi_template_tramp_tile
   343          FFI_HIDDEN(ffi_template_tramp_tile)
   344  ffi_template_tramp_tile:
   345  #ifdef __tilegx__
   346          {
   347            moveli r11, 0 /* backpatched to address of containing closure. */
   348            moveli r10, 0 /* backpatched to ffi_closure_tile. */
   349          }
   350          /* Note: the following bundle gets generated multiple times
   351             depending on the pointer value (esp. useful for -m32 mode). */
   352          { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
   353          { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
   354  #else
   355          /* 'jal .' yields a PC-relative offset of zero so we can OR in the
   356             right offset at runtime. */
   357          { move r10, lr ; jal . /* ffi_closure_tile */ }
   358  #endif
   359  
   360          .size   ffi_template_tramp_tile, . - ffi_template_tramp_tile