github.com/notti/go-dynamic@v0.0.0-20190619201224-fc443047424c/steps/3_goffi/ffi/call_linux_amd64.s (about) 1 #include "textflag.h" 2 #include "ffi.h" 3 4 // runtime has #include "go_asm.h" 5 // we need to fake the defines here: 6 #define g_stack 0 7 #define stack_lo 0 8 #define slice_array 0 9 #define slice_len 8 10 #define slice_cap 16 11 12 13 /* 14 Frame layout: 15 int: 16 type64: movq 64 bit 17 typeS32: movlqsx signed 32 bit 18 typeU32: movlqzx unsigned 32 bit 19 typeS16: movwqsx signed 16 bit 20 typeU16: movwqzx unsigned 16 bit 21 typeS8: movbqsx signed 8 bit 22 typeU8: movbqzx unsigned 8 bit 23 24 float: 25 typeDouble: movsd 64 bit 26 typeFloat: movss 32 bit 27 */ 28 29 #define LOADREG(off, target) \ 30 MOVLQSX Spec_intargs+argument__size*off(R12), AX \ 31 TESTQ AX, AX \ 32 JS xmm \ 33 MOVWQZX AX, R11 \ 34 SHRL $16, AX \ 35 ADDQ R13, R11 \ 36 CMPB AX, $const_type64 \ 37 JNE 3(PC) \ 38 MOVQ 0(R11), target \ // 64bit 39 JMP 20(PC) \ 40 CMPB AX, $const_typeS32 \ 41 JNE 3(PC) \ 42 MOVLQSX 0(R11), target \ // signed 32 bit 43 JMP 18(PC) \ 44 CMPB AX, $const_typeU32 \ 45 JNE 3(PC) \ 46 MOVLQZX 0(R11), target \ // unsigned 32 bit 47 JMP 14(PC) \ 48 CMPB AX, $const_typeS16 \ 49 JNE 3(PC) \ 50 MOVWQSX 0(R11), target \ // signed 16 bit 51 JMP 10(PC) \ 52 CMPB AX, $const_typeU16 \ 53 JNE 3(PC) \ 54 MOVWQZX 0(R11), target \ // unsigned 16 bit 55 JMP 6(PC) \ 56 CMPB AX, $const_typeS8 \ 57 JNE 3(PC) \ 58 MOVBQSX 0(R11), target \ // signed 8 bit 59 JMP 2(PC) \ 60 MOVBQZX 0(R11), target // unsigned 8 bit 61 62 #define LOADXMMREG(off, target) \ 63 MOVLQSX Spec_xmmargs+argument__size*off(R12), AX \ 64 TESTQ AX, AX \ 65 JS prepared \ 66 MOVWQZX AX, R11 \ 67 SHRL $16, AX \ 68 ADDQ R13, R11 \ 69 CMPB AX, $const_typeDouble \ 70 JNE 3(PC) \ 71 MOVSD 0(R11), target \ // float 64bit 72 JMP 2(PC) \ 73 MOVSS 0(R11), target \ // float 32bit 74 75 76 // func asmcall() 77 TEXT ·asmcall(SB),NOSPLIT,$0 78 MOVQ DI, R12 // FRAME (preserved) 79 MOVQ Spec_base(R12), R13 // base 80 MOVQ SP, R14 // stack 81 82 ANDQ $~0x1F, SP // 32 byte alignment for cdecl (in case someone wants to pass __m256 on the stack) 83 // for no __m256 16 byte would be ok 84 // this is actually already done by cgocall - but asmcall was called from there and destroys that :( 85 86 MOVQ Spec_stack+slice_len(R12), AX // length of stack registers 87 TESTQ AX, AX 88 JZ reg 89 90 // Fix alignment depending on number of arguments 91 MOVQ AX, BX 92 ANDQ $3, BX 93 SHLQ $3, BX 94 SUBQ BX, SP 95 96 MOVQ Spec_stack+slice_array(R12), BX 97 98 next: 99 DECQ AX 100 MOVQ 0(BX)(AX*argument__size), CX 101 //check type and push to stack 102 MOVWQZX CX, R11 103 SHRL $16, CX 104 ADDQ R13, R11 105 106 CMPB CX, $const_type64 107 JNE 7(PC) 108 SUBQ $8, SP 109 MOVQ 0(R11), CX 110 MOVQ CX, 0(SP) 111 TESTQ AX, AX 112 JZ reg 113 JMP next 114 115 CMPB CX, $const_typeS32 116 JNE 7(PC) 117 SUBQ $8, SP 118 MOVLQSX 0(R11), CX 119 MOVQ CX, 0(SP) 120 TESTQ AX, AX 121 JZ reg 122 JMP next 123 124 CMPB CX, $const_typeU32 125 JNE 7(PC) 126 SUBQ $8, SP 127 MOVLQZX 0(R11), CX 128 MOVQ CX, 0(SP) 129 TESTQ AX, AX 130 JZ reg 131 JMP next 132 133 CMPB CX, $const_typeS16 134 JNE 7(PC) 135 SUBQ $8, SP 136 MOVWQSX 0(R11), CX 137 MOVQ CX, 0(SP) 138 TESTQ AX, AX 139 JZ reg 140 JMP next 141 142 CMPB CX, $const_typeU16 143 JNE 7(PC) 144 SUBQ $8, SP 145 MOVWQZX 0(R11), CX 146 MOVQ CX, 0(SP) 147 TESTQ AX, AX 148 JZ reg 149 JMP next 150 151 CMPB CX, $const_typeS8 152 JNE 7(PC) 153 SUBQ $8, SP 154 MOVBQSX 0(R11), CX 155 MOVQ CX, 0(SP) 156 TESTQ AX, AX 157 JZ reg 158 JMP next 159 160 CMPB CX, $const_typeU8 161 JNE 7(PC) 162 SUBQ $8, SP 163 MOVBQZX 0(R11), CX 164 MOVQ CX, 0(SP) 165 TESTQ AX, AX 166 JZ reg 167 JMP next 168 169 CMPB CX, $const_typeU8 170 JNE 7(PC) 171 SUBQ $8, SP 172 MOVBQZX 0(R11), CX 173 MOVQ CX, 0(SP) 174 TESTQ AX, AX 175 JZ reg 176 JMP next 177 178 CMPB CX, $const_typeDouble 179 JNE 7(PC) 180 SUBQ $8, SP 181 MOVSD 0(R11), X0 182 MOVSD X0, 0(SP) 183 TESTQ AX, AX 184 JZ reg 185 JMP next 186 187 SUBQ $8, SP 188 MOVSS 0(R11), X0 189 MOVSS X0, 0(SP) 190 TESTQ AX, AX 191 JZ reg 192 JMP next 193 194 reg: 195 // load register arguments 196 LOADREG(0, DI) 197 LOADREG(1, SI) 198 LOADREG(2, DX) 199 LOADREG(3, CX) 200 LOADREG(4, R8) 201 LOADREG(5, R9) 202 203 xmm: 204 // load xmm arguments 205 LOADXMMREG(0, X0) 206 LOADXMMREG(1, X1) 207 LOADXMMREG(2, X2) 208 LOADXMMREG(3, X3) 209 LOADXMMREG(4, X4) 210 LOADXMMREG(5, X5) 211 LOADXMMREG(6, X6) 212 LOADXMMREG(7, X7) 213 214 prepared: 215 // load number of vector registers 216 MOVBQZX Spec_rax(R12), AX 217 218 // do the actuall call 219 CALL (R12) 220 221 MOVQ R14, SP 222 223 // store ret0 224 MOVLQSX Spec_ret0(R12), BX 225 TESTQ BX, BX 226 JS xmmret0 227 MOVWQZX BX, R11 228 SHRL $16, BX 229 ADDQ R13, R11 230 CMPB BX, $0 231 JNE 3(PC) 232 MOVQ AX, (R11) 233 JMP ret1 234 CMPB BX, $2 235 JGT 3(PC) 236 MOVL AX, (R11) 237 JMP ret1 238 CMPB BX, $4 239 JGT 3(PC) 240 MOVW AX, (R11) 241 JMP ret1 242 MOVB AX, (R11) 243 244 ret1: 245 // store ret1 246 MOVLQSX Spec_ret1(R12), BX 247 TESTQ BX, BX 248 JS DONE 249 MOVWQZX BX, R11 250 SHRL $16, BX 251 ADDQ R13, R11 252 CMPB BX, $0 253 JNE 3(PC) 254 MOVQ DX, (R11) 255 JMP ret1 256 CMPB BX, $2 257 JGT 3(PC) 258 MOVL DX, (R11) 259 JMP ret1 260 CMPB BX, $4 261 JGT 3(PC) 262 MOVW DX, (R11) 263 JMP ret1 264 MOVB DX, (R11) 265 266 xmmret0: 267 // store xmmret0 268 MOVLQSX Spec_xmmret0(R12), BX 269 TESTQ BX, BX 270 JS DONE 271 MOVWQZX BX, R11 272 SHRL $16, BX 273 ADDQ R13, R11 274 CMPB BX, $7 275 JNE 3(PC) 276 MOVSD X0, (R11) 277 JMP xmmret1 278 MOVSS X0, (R11) 279 280 xmmret1: 281 // store xmmret1 282 MOVLQSX Spec_xmmret1(R12), BX 283 TESTQ BX, BX 284 JS DONE 285 MOVWQZX BX, R11 286 SHRL $16, BX 287 ADDQ R13, R11 288 CMPB BX, $7 289 JNE 3(PC) 290 MOVSD X1, (R11) 291 JMP xmmret1 292 MOVSS X1, (R11) 293 294 DONE: 295 RET 296 297 298 GLOBL pthread_attr_init__dynload(SB), NOPTR, $8 299 GLOBL pthread_attr_getstacksize__dynload(SB), NOPTR, $8 300 GLOBL pthread_attr_destroy__dynload(SB), NOPTR, $8 301 302 TEXT x_cgo_init(SB),NOSPLIT,$512 // size_t size (8 byte) + unknown pthread_attr_t - hopefully this is big enough 303 MOVQ DI, R12 // g 304 305 // pthread_attr_init(8(SP)) 306 LEAQ 8(SP), DI 307 MOVQ $pthread_attr_init__dynload(SB), R11 308 CALL (R11) 309 310 // pthread_attr_init(8(SP), 0(SP)) 311 LEAQ 8(SP), DI 312 LEAQ 0(SP), SI 313 MOVQ $pthread_attr_getstacksize__dynload(SB), R11 314 CALL (R11) 315 316 // g->stacklo = &size - size + 4096 317 LEAQ 0x1000(SP), AX 318 SUBQ 0(SP), AX 319 MOVQ AX, (g_stack+stack_lo)(R12) 320 321 // pthread_attr_init(8(SP)) 322 LEAQ 8(SP), DI 323 MOVQ $pthread_attr_destroy__dynload(SB), R11 324 CALL (R11) 325 326 RET