github.com/notti/go-dynamic@v0.0.0-20190619201224-fc443047424c/steps/3_goffi/ffi/call_linux_amd64.s (about)

     1  #include "textflag.h"
     2  #include "ffi.h"
     3  
     4  // runtime has #include "go_asm.h"
     5  // we need to fake the defines here:
     6  #define g_stack 0
     7  #define stack_lo 0
     8  #define slice_array 0
     9  #define slice_len 8
    10  #define slice_cap 16
    11  
    12  
    13  /*
    14      Frame layout:
    15      int:
    16      type64:     movq              64 bit
    17      typeS32:    movlqsx    signed 32 bit
    18      typeU32:    movlqzx  unsigned 32 bit
    19      typeS16:    movwqsx    signed 16 bit
    20      typeU16:    movwqzx  unsigned 16 bit
    21      typeS8:     movbqsx    signed 8  bit
    22      typeU8:     movbqzx  unsigned 8  bit
    23  
    24      float:
    25      typeDouble: movsd             64 bit
    26      typeFloat:  movss             32 bit
    27  */
    28  
    29  #define LOADREG(off, target) \
    30      MOVLQSX Spec_intargs+argument__size*off(R12), AX \
    31      TESTQ AX, AX \
    32      JS xmm \
    33      MOVWQZX AX, R11 \
    34      SHRL $16, AX \
    35      ADDQ R13, R11 \
    36      CMPB AX, $const_type64 \
    37      JNE 3(PC) \
    38      MOVQ 0(R11), target \ // 64bit
    39      JMP 20(PC) \
    40      CMPB AX, $const_typeS32 \
    41      JNE 3(PC) \
    42      MOVLQSX 0(R11), target \ // signed 32 bit
    43      JMP 18(PC) \
    44      CMPB AX, $const_typeU32 \
    45      JNE 3(PC) \
    46      MOVLQZX 0(R11), target \ // unsigned 32 bit
    47      JMP 14(PC) \
    48      CMPB AX, $const_typeS16 \
    49      JNE 3(PC) \
    50      MOVWQSX 0(R11), target \ // signed 16 bit
    51      JMP 10(PC) \
    52      CMPB AX, $const_typeU16 \
    53      JNE 3(PC) \
    54      MOVWQZX 0(R11), target \ // unsigned 16 bit
    55      JMP 6(PC) \
    56      CMPB AX, $const_typeS8 \
    57      JNE 3(PC) \
    58      MOVBQSX 0(R11), target \ // signed 8 bit
    59      JMP 2(PC) \
    60      MOVBQZX 0(R11), target // unsigned 8 bit
    61  
    62  #define LOADXMMREG(off, target) \
    63      MOVLQSX Spec_xmmargs+argument__size*off(R12), AX \
    64      TESTQ AX, AX \
    65      JS prepared \
    66      MOVWQZX AX, R11 \
    67      SHRL $16, AX \
    68      ADDQ R13, R11 \
    69      CMPB AX, $const_typeDouble \
    70      JNE 3(PC) \
    71      MOVSD 0(R11), target \ // float 64bit
    72      JMP 2(PC) \
    73      MOVSS 0(R11), target \ // float 32bit
    74  
    75  
    76  // func asmcall()
    77  TEXT ·asmcall(SB),NOSPLIT,$0
    78      MOVQ DI, R12      // FRAME (preserved)
    79      MOVQ Spec_base(R12), R13  // base
    80      MOVQ SP, R14 // stack
    81  
    82      ANDQ $~0x1F, SP // 32 byte alignment for cdecl (in case someone wants to pass __m256 on the stack)
    83      // for no __m256 16 byte would be ok
    84      // this is actually already done by cgocall - but asmcall was called from there and destroys that :(
    85  
    86      MOVQ Spec_stack+slice_len(R12), AX // length of stack registers
    87      TESTQ AX, AX
    88      JZ reg
    89  
    90      // Fix alignment depending on number of arguments
    91      MOVQ AX, BX
    92      ANDQ $3, BX
    93      SHLQ $3, BX
    94      SUBQ BX, SP
    95  
    96      MOVQ Spec_stack+slice_array(R12), BX
    97  
    98  next:
    99      DECQ AX
   100      MOVQ 0(BX)(AX*argument__size), CX
   101      //check type and push to stack
   102      MOVWQZX CX, R11
   103      SHRL $16, CX
   104      ADDQ R13, R11
   105  
   106      CMPB CX, $const_type64
   107      JNE 7(PC)
   108      SUBQ $8, SP
   109      MOVQ 0(R11), CX
   110      MOVQ CX, 0(SP)
   111      TESTQ AX, AX
   112      JZ reg
   113      JMP next
   114  
   115      CMPB CX, $const_typeS32
   116      JNE 7(PC)
   117      SUBQ $8, SP
   118      MOVLQSX 0(R11), CX
   119      MOVQ CX, 0(SP)
   120      TESTQ AX, AX
   121      JZ reg
   122      JMP next
   123      
   124      CMPB CX, $const_typeU32
   125      JNE 7(PC)
   126      SUBQ $8, SP
   127      MOVLQZX 0(R11), CX
   128      MOVQ CX, 0(SP)
   129      TESTQ AX, AX
   130      JZ reg
   131      JMP next
   132      
   133      CMPB CX, $const_typeS16
   134      JNE 7(PC)
   135      SUBQ $8, SP
   136      MOVWQSX 0(R11), CX
   137      MOVQ CX, 0(SP)
   138      TESTQ AX, AX
   139      JZ reg
   140      JMP next
   141      
   142      CMPB CX, $const_typeU16
   143      JNE 7(PC)
   144      SUBQ $8, SP
   145      MOVWQZX 0(R11), CX
   146      MOVQ CX, 0(SP)
   147      TESTQ AX, AX
   148      JZ reg
   149      JMP next
   150      
   151      CMPB CX, $const_typeS8
   152      JNE 7(PC)
   153      SUBQ $8, SP
   154      MOVBQSX 0(R11), CX
   155      MOVQ CX, 0(SP)
   156      TESTQ AX, AX
   157      JZ reg
   158      JMP next
   159      
   160      CMPB CX, $const_typeU8
   161      JNE 7(PC)
   162      SUBQ $8, SP
   163      MOVBQZX 0(R11), CX
   164      MOVQ CX, 0(SP)
   165      TESTQ AX, AX
   166      JZ reg
   167      JMP next
   168  
   169      CMPB CX, $const_typeU8
   170      JNE 7(PC)
   171      SUBQ $8, SP
   172      MOVBQZX 0(R11), CX
   173      MOVQ CX, 0(SP)
   174      TESTQ AX, AX
   175      JZ reg
   176      JMP next
   177  
   178      CMPB CX, $const_typeDouble
   179      JNE 7(PC)
   180      SUBQ $8, SP
   181      MOVSD 0(R11), X0
   182      MOVSD X0, 0(SP)
   183      TESTQ AX, AX
   184      JZ reg
   185      JMP next
   186  
   187      SUBQ $8, SP
   188      MOVSS 0(R11), X0
   189      MOVSS X0, 0(SP)
   190      TESTQ AX, AX
   191      JZ reg
   192      JMP next
   193  
   194  reg:
   195      // load register arguments
   196      LOADREG(0, DI)
   197      LOADREG(1, SI)
   198      LOADREG(2, DX)
   199      LOADREG(3, CX)
   200      LOADREG(4, R8)
   201      LOADREG(5, R9)
   202  
   203  xmm:
   204      // load xmm arguments
   205      LOADXMMREG(0, X0)
   206      LOADXMMREG(1, X1)
   207      LOADXMMREG(2, X2)
   208      LOADXMMREG(3, X3)
   209      LOADXMMREG(4, X4)
   210      LOADXMMREG(5, X5)
   211      LOADXMMREG(6, X6)
   212      LOADXMMREG(7, X7)
   213  
   214  prepared:
   215      // load number of vector registers
   216      MOVBQZX Spec_rax(R12), AX
   217  
   218      // do the actuall call
   219      CALL (R12)
   220  
   221      MOVQ R14, SP
   222  
   223      // store ret0
   224      MOVLQSX Spec_ret0(R12), BX
   225      TESTQ BX, BX
   226      JS xmmret0
   227      MOVWQZX BX, R11
   228      SHRL $16, BX
   229      ADDQ R13, R11
   230      CMPB BX, $0
   231      JNE 3(PC)
   232      MOVQ AX, (R11)
   233      JMP ret1
   234      CMPB BX, $2
   235      JGT 3(PC)
   236      MOVL AX, (R11)
   237      JMP ret1
   238      CMPB BX, $4
   239      JGT 3(PC)
   240      MOVW AX, (R11)
   241      JMP ret1
   242      MOVB AX, (R11)
   243  
   244  ret1:
   245      // store ret1
   246      MOVLQSX Spec_ret1(R12), BX
   247      TESTQ BX, BX
   248      JS DONE
   249      MOVWQZX BX, R11
   250      SHRL $16, BX
   251      ADDQ R13, R11
   252      CMPB BX, $0
   253      JNE 3(PC)
   254      MOVQ DX, (R11)
   255      JMP ret1
   256      CMPB BX, $2
   257      JGT 3(PC)
   258      MOVL DX, (R11)
   259      JMP ret1
   260      CMPB BX, $4
   261      JGT 3(PC)
   262      MOVW DX, (R11)
   263      JMP ret1
   264      MOVB DX, (R11)
   265  
   266  xmmret0:
   267      // store xmmret0
   268      MOVLQSX Spec_xmmret0(R12), BX
   269      TESTQ BX, BX
   270      JS DONE
   271      MOVWQZX BX, R11
   272      SHRL $16, BX
   273      ADDQ R13, R11
   274      CMPB BX, $7
   275      JNE 3(PC)
   276      MOVSD X0, (R11)
   277      JMP xmmret1
   278      MOVSS X0, (R11)
   279  
   280  xmmret1:
   281      // store xmmret1
   282      MOVLQSX Spec_xmmret1(R12), BX
   283      TESTQ BX, BX
   284      JS DONE
   285      MOVWQZX BX, R11
   286      SHRL $16, BX
   287      ADDQ R13, R11
   288      CMPB BX, $7
   289      JNE 3(PC)
   290      MOVSD X1, (R11)
   291      JMP xmmret1
   292      MOVSS X1, (R11)
   293  
   294  DONE:
   295      RET
   296  
   297  
   298  GLOBL pthread_attr_init__dynload(SB), NOPTR, $8
   299  GLOBL pthread_attr_getstacksize__dynload(SB), NOPTR, $8
   300  GLOBL pthread_attr_destroy__dynload(SB), NOPTR, $8
   301  
   302  TEXT x_cgo_init(SB),NOSPLIT,$512 // size_t size (8 byte) + unknown pthread_attr_t - hopefully this is big enough
   303      MOVQ DI, R12 // g
   304  
   305      // pthread_attr_init(8(SP))
   306      LEAQ 8(SP), DI
   307      MOVQ $pthread_attr_init__dynload(SB), R11
   308      CALL (R11)
   309  
   310      // pthread_attr_init(8(SP), 0(SP))
   311      LEAQ 8(SP), DI
   312      LEAQ 0(SP), SI
   313      MOVQ $pthread_attr_getstacksize__dynload(SB), R11
   314      CALL (R11)
   315  
   316      // g->stacklo = &size - size + 4096
   317      LEAQ 0x1000(SP), AX
   318      SUBQ 0(SP), AX
   319      MOVQ AX, (g_stack+stack_lo)(R12)
   320  
   321      // pthread_attr_init(8(SP))
   322      LEAQ 8(SP), DI
   323      MOVQ $pthread_attr_destroy__dynload(SB), R11
   324      CALL (R11)
   325  
   326      RET