github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/runtime/asm_arm64.s (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "tls_arm64.h" 8 #include "funcdata.h" 9 #include "textflag.h" 10 11 TEXT runtime·rt0_go(SB),NOSPLIT,$0 12 // SP = stack; R0 = argc; R1 = argv 13 14 SUB $32, RSP 15 MOVW R0, 8(RSP) // argc 16 MOVD R1, 16(RSP) // argv 17 18 // create istack out of the given (operating system) stack. 19 // _cgo_init may update stackguard. 20 MOVD $runtime·g0(SB), g 21 MOVD RSP, R7 22 MOVD $(-64*1024)(R7), R0 23 MOVD R0, g_stackguard0(g) 24 MOVD R0, g_stackguard1(g) 25 MOVD R0, (g_stack+stack_lo)(g) 26 MOVD R7, (g_stack+stack_hi)(g) 27 28 // if there is a _cgo_init, call it using the gcc ABI. 29 MOVD _cgo_init(SB), R12 30 CMP $0, R12 31 BEQ nocgo 32 33 MRS_TPIDR_R0 // load TLS base pointer 34 MOVD R0, R3 // arg 3: TLS base pointer 35 #ifdef TLSG_IS_VARIABLE 36 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g 37 #else 38 MOVD $0, R2 // arg 2: not used when using platform's TLS 39 #endif 40 MOVD $setg_gcc<>(SB), R1 // arg 1: setg 41 MOVD g, R0 // arg 0: G 42 BL (R12) 43 MOVD _cgo_init(SB), R12 44 CMP $0, R12 45 BEQ nocgo 46 47 nocgo: 48 // update stackguard after _cgo_init 49 MOVD (g_stack+stack_lo)(g), R0 50 ADD $const__StackGuard, R0 51 MOVD R0, g_stackguard0(g) 52 MOVD R0, g_stackguard1(g) 53 54 // set the per-goroutine and per-mach "registers" 55 MOVD $runtime·m0(SB), R0 56 57 // save m->g0 = g0 58 MOVD g, m_g0(R0) 59 // save m0 to g0->m 60 MOVD R0, g_m(g) 61 62 BL runtime·check(SB) 63 64 MOVW 8(RSP), R0 // copy argc 65 MOVW R0, -8(RSP) 66 MOVD 16(RSP), R0 // copy argv 67 MOVD R0, 0(RSP) 68 BL runtime·args(SB) 69 BL runtime·osinit(SB) 70 BL runtime·schedinit(SB) 71 72 // create a new goroutine to start program 73 MOVD $runtime·mainPC(SB), R0 // entry 74 MOVD RSP, R7 75 MOVD.W $0, -8(R7) 76 MOVD.W R0, -8(R7) 77 MOVD.W $0, -8(R7) 78 MOVD.W $0, -8(R7) 79 MOVD R7, RSP 80 BL runtime·newproc(SB) 81 ADD $32, RSP 82 83 // start this M 84 BL runtime·mstart(SB) 85 86 MOVD $0, R0 87 MOVD R0, (R0) // boom 88 UNDEF 89 90 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 91 GLOBL runtime·mainPC(SB),RODATA,$8 92 93 TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0 94 BRK 95 RET 96 97 TEXT runtime·asminit(SB),NOSPLIT,$-8-0 98 RET 99 100 /* 101 * go-routine 102 */ 103 104 // void gosave(Gobuf*) 105 // save state in Gobuf; setjmp 106 TEXT runtime·gosave(SB), NOSPLIT, $-8-8 107 MOVD buf+0(FP), R3 108 MOVD RSP, R0 109 MOVD R0, gobuf_sp(R3) 110 MOVD LR, gobuf_pc(R3) 111 MOVD g, gobuf_g(R3) 112 MOVD ZR, gobuf_lr(R3) 113 MOVD ZR, gobuf_ret(R3) 114 // Assert ctxt is zero. See func save. 115 MOVD gobuf_ctxt(R3), R0 116 CMP $0, R0 117 BEQ 2(PC) 118 CALL runtime·badctxt(SB) 119 RET 120 121 // void gogo(Gobuf*) 122 // restore state from Gobuf; longjmp 123 TEXT runtime·gogo(SB), NOSPLIT, $24-8 124 MOVD buf+0(FP), R5 125 MOVD gobuf_g(R5), g 126 BL runtime·save_g(SB) 127 128 MOVD 0(g), R4 // make sure g is not nil 129 MOVD gobuf_sp(R5), R0 130 MOVD R0, RSP 131 MOVD gobuf_lr(R5), LR 132 MOVD gobuf_ret(R5), R0 133 MOVD gobuf_ctxt(R5), R26 134 MOVD $0, gobuf_sp(R5) 135 MOVD $0, gobuf_ret(R5) 136 MOVD $0, gobuf_lr(R5) 137 MOVD $0, gobuf_ctxt(R5) 138 CMP ZR, ZR // set condition codes for == test, needed by stack split 139 MOVD gobuf_pc(R5), R6 140 B (R6) 141 142 // void mcall(fn func(*g)) 143 // Switch to m->g0's stack, call fn(g). 144 // Fn must never return. It should gogo(&g->sched) 145 // to keep running g. 146 TEXT runtime·mcall(SB), NOSPLIT, $-8-8 147 // Save caller state in g->sched 148 MOVD RSP, R0 149 MOVD R0, (g_sched+gobuf_sp)(g) 150 MOVD LR, (g_sched+gobuf_pc)(g) 151 MOVD $0, (g_sched+gobuf_lr)(g) 152 MOVD g, (g_sched+gobuf_g)(g) 153 154 // Switch to m->g0 & its stack, call fn. 155 MOVD g, R3 156 MOVD g_m(g), R8 157 MOVD m_g0(R8), g 158 BL runtime·save_g(SB) 159 CMP g, R3 160 BNE 2(PC) 161 B runtime·badmcall(SB) 162 MOVD fn+0(FP), R26 // context 163 MOVD 0(R26), R4 // code pointer 164 MOVD (g_sched+gobuf_sp)(g), R0 165 MOVD R0, RSP // sp = m->g0->sched.sp 166 MOVD R3, -8(RSP) 167 MOVD $0, -16(RSP) 168 SUB $16, RSP 169 BL (R4) 170 B runtime·badmcall2(SB) 171 172 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 173 // of the G stack. We need to distinguish the routine that 174 // lives at the bottom of the G stack from the one that lives 175 // at the top of the system stack because the one at the top of 176 // the system stack terminates the stack walk (see topofstack()). 177 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 178 UNDEF 179 BL (LR) // make sure this function is not leaf 180 RET 181 182 // func systemstack(fn func()) 183 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 184 MOVD fn+0(FP), R3 // R3 = fn 185 MOVD R3, R26 // context 186 MOVD g_m(g), R4 // R4 = m 187 188 MOVD m_gsignal(R4), R5 // R5 = gsignal 189 CMP g, R5 190 BEQ noswitch 191 192 MOVD m_g0(R4), R5 // R5 = g0 193 CMP g, R5 194 BEQ noswitch 195 196 MOVD m_curg(R4), R6 197 CMP g, R6 198 BEQ switch 199 200 // Bad: g is not gsignal, not g0, not curg. What is it? 201 // Hide call from linker nosplit analysis. 202 MOVD $runtime·badsystemstack(SB), R3 203 BL (R3) 204 205 switch: 206 // save our state in g->sched. Pretend to 207 // be systemstack_switch if the G stack is scanned. 208 MOVD $runtime·systemstack_switch(SB), R6 209 ADD $8, R6 // get past prologue 210 MOVD R6, (g_sched+gobuf_pc)(g) 211 MOVD RSP, R0 212 MOVD R0, (g_sched+gobuf_sp)(g) 213 MOVD $0, (g_sched+gobuf_lr)(g) 214 MOVD g, (g_sched+gobuf_g)(g) 215 216 // switch to g0 217 MOVD R5, g 218 BL runtime·save_g(SB) 219 MOVD (g_sched+gobuf_sp)(g), R3 220 // make it look like mstart called systemstack on g0, to stop traceback 221 SUB $16, R3 222 AND $~15, R3 223 MOVD $runtime·mstart(SB), R4 224 MOVD R4, 0(R3) 225 MOVD R3, RSP 226 227 // call target function 228 MOVD 0(R26), R3 // code pointer 229 BL (R3) 230 231 // switch back to g 232 MOVD g_m(g), R3 233 MOVD m_curg(R3), g 234 BL runtime·save_g(SB) 235 MOVD (g_sched+gobuf_sp)(g), R0 236 MOVD R0, RSP 237 MOVD $0, (g_sched+gobuf_sp)(g) 238 RET 239 240 noswitch: 241 // already on m stack, just call directly 242 // Using a tail call here cleans up tracebacks since we won't stop 243 // at an intermediate systemstack. 244 MOVD 0(R26), R3 // code pointer 245 MOVD.P 16(RSP), R30 // restore LR 246 B (R3) 247 248 /* 249 * support for morestack 250 */ 251 252 // Called during function prolog when more stack is needed. 253 // Caller has already loaded: 254 // R3 prolog's LR (R30) 255 // 256 // The traceback routines see morestack on a g0 as being 257 // the top of a stack (for example, morestack calling newstack 258 // calling the scheduler calling newm calling gc), so we must 259 // record an argument size. For that purpose, it has no arguments. 260 TEXT runtime·morestack(SB),NOSPLIT,$-8-0 261 // Cannot grow scheduler stack (m->g0). 262 MOVD g_m(g), R8 263 MOVD m_g0(R8), R4 264 CMP g, R4 265 BNE 3(PC) 266 BL runtime·badmorestackg0(SB) 267 B runtime·abort(SB) 268 269 // Cannot grow signal stack (m->gsignal). 270 MOVD m_gsignal(R8), R4 271 CMP g, R4 272 BNE 3(PC) 273 BL runtime·badmorestackgsignal(SB) 274 B runtime·abort(SB) 275 276 // Called from f. 277 // Set g->sched to context in f 278 MOVD RSP, R0 279 MOVD R0, (g_sched+gobuf_sp)(g) 280 MOVD LR, (g_sched+gobuf_pc)(g) 281 MOVD R3, (g_sched+gobuf_lr)(g) 282 MOVD R26, (g_sched+gobuf_ctxt)(g) 283 284 // Called from f. 285 // Set m->morebuf to f's callers. 286 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC 287 MOVD RSP, R0 288 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP 289 MOVD g, (m_morebuf+gobuf_g)(R8) 290 291 // Call newstack on m->g0's stack. 292 MOVD m_g0(R8), g 293 BL runtime·save_g(SB) 294 MOVD (g_sched+gobuf_sp)(g), R0 295 MOVD R0, RSP 296 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned) 297 BL runtime·newstack(SB) 298 299 // Not reached, but make sure the return PC from the call to newstack 300 // is still in this function, and not the beginning of the next. 301 UNDEF 302 303 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0 304 MOVW $0, R26 305 B runtime·morestack(SB) 306 307 // reflectcall: call a function with the given argument list 308 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 309 // we don't have variable-sized frames, so we use a small number 310 // of constant-sized-frame functions to encode a few bits of size in the pc. 311 // Caution: ugly multiline assembly macros in your future! 312 313 #define DISPATCH(NAME,MAXSIZE) \ 314 MOVD $MAXSIZE, R27; \ 315 CMP R27, R16; \ 316 BGT 3(PC); \ 317 MOVD $NAME(SB), R27; \ 318 B (R27) 319 // Note: can't just "B NAME(SB)" - bad inlining results. 320 321 TEXT reflect·call(SB), NOSPLIT, $0-0 322 B ·reflectcall(SB) 323 324 TEXT ·reflectcall(SB), NOSPLIT, $-8-32 325 MOVWU argsize+24(FP), R16 326 DISPATCH(runtime·call32, 32) 327 DISPATCH(runtime·call64, 64) 328 DISPATCH(runtime·call128, 128) 329 DISPATCH(runtime·call256, 256) 330 DISPATCH(runtime·call512, 512) 331 DISPATCH(runtime·call1024, 1024) 332 DISPATCH(runtime·call2048, 2048) 333 DISPATCH(runtime·call4096, 4096) 334 DISPATCH(runtime·call8192, 8192) 335 DISPATCH(runtime·call16384, 16384) 336 DISPATCH(runtime·call32768, 32768) 337 DISPATCH(runtime·call65536, 65536) 338 DISPATCH(runtime·call131072, 131072) 339 DISPATCH(runtime·call262144, 262144) 340 DISPATCH(runtime·call524288, 524288) 341 DISPATCH(runtime·call1048576, 1048576) 342 DISPATCH(runtime·call2097152, 2097152) 343 DISPATCH(runtime·call4194304, 4194304) 344 DISPATCH(runtime·call8388608, 8388608) 345 DISPATCH(runtime·call16777216, 16777216) 346 DISPATCH(runtime·call33554432, 33554432) 347 DISPATCH(runtime·call67108864, 67108864) 348 DISPATCH(runtime·call134217728, 134217728) 349 DISPATCH(runtime·call268435456, 268435456) 350 DISPATCH(runtime·call536870912, 536870912) 351 DISPATCH(runtime·call1073741824, 1073741824) 352 MOVD $runtime·badreflectcall(SB), R0 353 B (R0) 354 355 #define CALLFN(NAME,MAXSIZE) \ 356 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ 357 NO_LOCAL_POINTERS; \ 358 /* copy arguments to stack */ \ 359 MOVD arg+16(FP), R3; \ 360 MOVWU argsize+24(FP), R4; \ 361 ADD $8, RSP, R5; \ 362 BIC $0xf, R4, R6; \ 363 CBZ R6, 6(PC); \ 364 /* if R6=(argsize&~15) != 0 */ \ 365 ADD R6, R5, R6; \ 366 /* copy 16 bytes a time */ \ 367 LDP.P 16(R3), (R7, R8); \ 368 STP.P (R7, R8), 16(R5); \ 369 CMP R5, R6; \ 370 BNE -3(PC); \ 371 AND $0xf, R4, R6; \ 372 CBZ R6, 6(PC); \ 373 /* if R6=(argsize&15) != 0 */ \ 374 ADD R6, R5, R6; \ 375 /* copy 1 byte a time for the rest */ \ 376 MOVBU.P 1(R3), R7; \ 377 MOVBU.P R7, 1(R5); \ 378 CMP R5, R6; \ 379 BNE -3(PC); \ 380 /* call function */ \ 381 MOVD f+8(FP), R26; \ 382 MOVD (R26), R0; \ 383 PCDATA $PCDATA_StackMapIndex, $0; \ 384 BL (R0); \ 385 /* copy return values back */ \ 386 MOVD argtype+0(FP), R7; \ 387 MOVD arg+16(FP), R3; \ 388 MOVWU n+24(FP), R4; \ 389 MOVWU retoffset+28(FP), R6; \ 390 ADD $8, RSP, R5; \ 391 ADD R6, R5; \ 392 ADD R6, R3; \ 393 SUB R6, R4; \ 394 BL callRet<>(SB); \ 395 RET 396 397 // callRet copies return values back at the end of call*. This is a 398 // separate function so it can allocate stack space for the arguments 399 // to reflectcallmove. It does not follow the Go ABI; it expects its 400 // arguments in registers. 401 TEXT callRet<>(SB), NOSPLIT, $40-0 402 MOVD R7, 8(RSP) 403 MOVD R3, 16(RSP) 404 MOVD R5, 24(RSP) 405 MOVD R4, 32(RSP) 406 BL runtime·reflectcallmove(SB) 407 RET 408 409 // These have 8 added to make the overall frame size a multiple of 16, 410 // as required by the ABI. (There is another +8 for the saved LR.) 411 CALLFN(·call32, 40 ) 412 CALLFN(·call64, 72 ) 413 CALLFN(·call128, 136 ) 414 CALLFN(·call256, 264 ) 415 CALLFN(·call512, 520 ) 416 CALLFN(·call1024, 1032 ) 417 CALLFN(·call2048, 2056 ) 418 CALLFN(·call4096, 4104 ) 419 CALLFN(·call8192, 8200 ) 420 CALLFN(·call16384, 16392 ) 421 CALLFN(·call32768, 32776 ) 422 CALLFN(·call65536, 65544 ) 423 CALLFN(·call131072, 131080 ) 424 CALLFN(·call262144, 262152 ) 425 CALLFN(·call524288, 524296 ) 426 CALLFN(·call1048576, 1048584 ) 427 CALLFN(·call2097152, 2097160 ) 428 CALLFN(·call4194304, 4194312 ) 429 CALLFN(·call8388608, 8388616 ) 430 CALLFN(·call16777216, 16777224 ) 431 CALLFN(·call33554432, 33554440 ) 432 CALLFN(·call67108864, 67108872 ) 433 CALLFN(·call134217728, 134217736 ) 434 CALLFN(·call268435456, 268435464 ) 435 CALLFN(·call536870912, 536870920 ) 436 CALLFN(·call1073741824, 1073741832 ) 437 438 // AES hashing not implemented for ARM64, issue #10109. 439 TEXT runtime·aeshash(SB),NOSPLIT,$-8-0 440 MOVW $0, R0 441 MOVW (R0), R1 442 TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0 443 MOVW $0, R0 444 MOVW (R0), R1 445 TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0 446 MOVW $0, R0 447 MOVW (R0), R1 448 TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0 449 MOVW $0, R0 450 MOVW (R0), R1 451 452 TEXT runtime·procyield(SB),NOSPLIT,$0-0 453 MOVWU cycles+0(FP), R0 454 again: 455 YIELD 456 SUBW $1, R0 457 CBNZ R0, again 458 RET 459 460 // void jmpdefer(fv, sp); 461 // called from deferreturn. 462 // 1. grab stored LR for caller 463 // 2. sub 4 bytes to get back to BL deferreturn 464 // 3. BR to fn 465 TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16 466 MOVD 0(RSP), R0 467 SUB $4, R0 468 MOVD R0, LR 469 470 MOVD fv+0(FP), R26 471 MOVD argp+8(FP), R0 472 MOVD R0, RSP 473 SUB $8, RSP 474 MOVD 0(R26), R3 475 B (R3) 476 477 // Save state of caller into g->sched. Smashes R0. 478 TEXT gosave<>(SB),NOSPLIT,$-8 479 MOVD LR, (g_sched+gobuf_pc)(g) 480 MOVD RSP, R0 481 MOVD R0, (g_sched+gobuf_sp)(g) 482 MOVD $0, (g_sched+gobuf_lr)(g) 483 MOVD $0, (g_sched+gobuf_ret)(g) 484 // Assert ctxt is zero. See func save. 485 MOVD (g_sched+gobuf_ctxt)(g), R0 486 CMP $0, R0 487 BEQ 2(PC) 488 CALL runtime·badctxt(SB) 489 RET 490 491 // func asmcgocall(fn, arg unsafe.Pointer) int32 492 // Call fn(arg) on the scheduler stack, 493 // aligned appropriately for the gcc ABI. 494 // See cgocall.go for more details. 495 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 496 MOVD fn+0(FP), R1 497 MOVD arg+8(FP), R0 498 499 MOVD RSP, R2 // save original stack pointer 500 MOVD g, R4 501 502 // Figure out if we need to switch to m->g0 stack. 503 // We get called to create new OS threads too, and those 504 // come in on the m->g0 stack already. 505 MOVD g_m(g), R8 506 MOVD m_g0(R8), R3 507 CMP R3, g 508 BEQ g0 509 MOVD R0, R9 // gosave<> and save_g might clobber R0 510 BL gosave<>(SB) 511 MOVD R3, g 512 BL runtime·save_g(SB) 513 MOVD (g_sched+gobuf_sp)(g), R0 514 MOVD R0, RSP 515 MOVD R9, R0 516 517 // Now on a scheduling stack (a pthread-created stack). 518 g0: 519 // Save room for two of our pointers /*, plus 32 bytes of callee 520 // save area that lives on the caller stack. */ 521 MOVD RSP, R13 522 SUB $16, R13 523 MOVD R13, RSP 524 MOVD R4, 0(RSP) // save old g on stack 525 MOVD (g_stack+stack_hi)(R4), R4 526 SUB R2, R4 527 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback) 528 BL (R1) 529 MOVD R0, R9 530 531 // Restore g, stack pointer. R0 is errno, so don't touch it 532 MOVD 0(RSP), g 533 BL runtime·save_g(SB) 534 MOVD (g_stack+stack_hi)(g), R5 535 MOVD 8(RSP), R6 536 SUB R6, R5 537 MOVD R9, R0 538 MOVD R5, RSP 539 540 MOVW R0, ret+16(FP) 541 RET 542 543 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 544 // Turn the fn into a Go func (by taking its address) and call 545 // cgocallback_gofunc. 546 TEXT runtime·cgocallback(SB),NOSPLIT,$40-32 547 MOVD $fn+0(FP), R0 548 MOVD R0, 8(RSP) 549 MOVD frame+8(FP), R0 550 MOVD R0, 16(RSP) 551 MOVD framesize+16(FP), R0 552 MOVD R0, 24(RSP) 553 MOVD ctxt+24(FP), R0 554 MOVD R0, 32(RSP) 555 MOVD $runtime·cgocallback_gofunc(SB), R0 556 BL (R0) 557 RET 558 559 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 560 // See cgocall.go for more details. 561 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32 562 NO_LOCAL_POINTERS 563 564 // Load g from thread-local storage. 565 MOVB runtime·iscgo(SB), R3 566 CMP $0, R3 567 BEQ nocgo 568 BL runtime·load_g(SB) 569 nocgo: 570 571 // If g is nil, Go did not create the current thread. 572 // Call needm to obtain one for temporary use. 573 // In this case, we're running on the thread stack, so there's 574 // lots of space, but the linker doesn't know. Hide the call from 575 // the linker analysis by using an indirect call. 576 CMP $0, g 577 BEQ needm 578 579 MOVD g_m(g), R8 580 MOVD R8, savedm-8(SP) 581 B havem 582 583 needm: 584 MOVD g, savedm-8(SP) // g is zero, so is m. 585 MOVD $runtime·needm(SB), R0 586 BL (R0) 587 588 // Set m->sched.sp = SP, so that if a panic happens 589 // during the function we are about to execute, it will 590 // have a valid SP to run on the g0 stack. 591 // The next few lines (after the havem label) 592 // will save this SP onto the stack and then write 593 // the same SP back to m->sched.sp. That seems redundant, 594 // but if an unrecovered panic happens, unwindm will 595 // restore the g->sched.sp from the stack location 596 // and then systemstack will try to use it. If we don't set it here, 597 // that restored SP will be uninitialized (typically 0) and 598 // will not be usable. 599 MOVD g_m(g), R8 600 MOVD m_g0(R8), R3 601 MOVD RSP, R0 602 MOVD R0, (g_sched+gobuf_sp)(R3) 603 604 havem: 605 // Now there's a valid m, and we're running on its m->g0. 606 // Save current m->g0->sched.sp on stack and then set it to SP. 607 // Save current sp in m->g0->sched.sp in preparation for 608 // switch back to m->curg stack. 609 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP). 610 // Beware that the frame size is actually 32. 611 MOVD m_g0(R8), R3 612 MOVD (g_sched+gobuf_sp)(R3), R4 613 MOVD R4, savedsp-16(SP) 614 MOVD RSP, R0 615 MOVD R0, (g_sched+gobuf_sp)(R3) 616 617 // Switch to m->curg stack and call runtime.cgocallbackg. 618 // Because we are taking over the execution of m->curg 619 // but *not* resuming what had been running, we need to 620 // save that information (m->curg->sched) so we can restore it. 621 // We can restore m->curg->sched.sp easily, because calling 622 // runtime.cgocallbackg leaves SP unchanged upon return. 623 // To save m->curg->sched.pc, we push it onto the stack. 624 // This has the added benefit that it looks to the traceback 625 // routine like cgocallbackg is going to return to that 626 // PC (because the frame we allocate below has the same 627 // size as cgocallback_gofunc's frame declared above) 628 // so that the traceback will seamlessly trace back into 629 // the earlier calls. 630 // 631 // In the new goroutine, -8(SP) is unused (where SP refers to 632 // m->curg's SP while we're setting it up, before we've adjusted it). 633 MOVD m_curg(R8), g 634 BL runtime·save_g(SB) 635 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 636 MOVD (g_sched+gobuf_pc)(g), R5 637 MOVD R5, -(24+8)(R4) 638 MOVD ctxt+24(FP), R0 639 MOVD R0, -(16+8)(R4) 640 MOVD $-(24+8)(R4), R0 // maintain 16-byte SP alignment 641 MOVD R0, RSP 642 BL runtime·cgocallbackg(SB) 643 644 // Restore g->sched (== m->curg->sched) from saved values. 645 MOVD 0(RSP), R5 646 MOVD R5, (g_sched+gobuf_pc)(g) 647 MOVD RSP, R4 648 ADD $(24+8), R4, R4 649 MOVD R4, (g_sched+gobuf_sp)(g) 650 651 // Switch back to m->g0's stack and restore m->g0->sched.sp. 652 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 653 // so we do not have to restore it.) 654 MOVD g_m(g), R8 655 MOVD m_g0(R8), g 656 BL runtime·save_g(SB) 657 MOVD (g_sched+gobuf_sp)(g), R0 658 MOVD R0, RSP 659 MOVD savedsp-16(SP), R4 660 MOVD R4, (g_sched+gobuf_sp)(g) 661 662 // If the m on entry was nil, we called needm above to borrow an m 663 // for the duration of the call. Since the call is over, return it with dropm. 664 MOVD savedm-8(SP), R6 665 CMP $0, R6 666 BNE droppedm 667 MOVD $runtime·dropm(SB), R0 668 BL (R0) 669 droppedm: 670 671 // Done! 672 RET 673 674 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 675 // Must obey the gcc calling convention. 676 TEXT _cgo_topofstack(SB),NOSPLIT,$24 677 // g (R28) and REGTMP (R27) might be clobbered by load_g. They 678 // are callee-save in the gcc calling convention, so save them. 679 MOVD R27, savedR27-8(SP) 680 MOVD g, saveG-16(SP) 681 682 BL runtime·load_g(SB) 683 MOVD g_m(g), R0 684 MOVD m_curg(R0), R0 685 MOVD (g_stack+stack_hi)(R0), R0 686 687 MOVD saveG-16(SP), g 688 MOVD savedR28-8(SP), R27 689 RET 690 691 // void setg(G*); set g. for use by needm. 692 TEXT runtime·setg(SB), NOSPLIT, $0-8 693 MOVD gg+0(FP), g 694 // This only happens if iscgo, so jump straight to save_g 695 BL runtime·save_g(SB) 696 RET 697 698 // void setg_gcc(G*); set g called from gcc 699 TEXT setg_gcc<>(SB),NOSPLIT,$8 700 MOVD R0, g 701 MOVD R27, savedR27-8(SP) 702 BL runtime·save_g(SB) 703 MOVD savedR27-8(SP), R27 704 RET 705 706 TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8 707 MOVD 0(RSP), R0 // LR saved by caller 708 MOVD R0, ret+0(FP) 709 RET 710 711 TEXT runtime·abort(SB),NOSPLIT,$-8-0 712 B (ZR) 713 UNDEF 714 715 // memequal(a, b unsafe.Pointer, size uintptr) bool 716 TEXT runtime·memequal(SB),NOSPLIT,$-8-25 717 MOVD size+16(FP), R1 718 // short path to handle 0-byte case 719 CBZ R1, equal 720 MOVD a+0(FP), R0 721 MOVD b+8(FP), R2 722 MOVD $ret+24(FP), R8 723 B runtime·memeqbody<>(SB) 724 equal: 725 MOVD $1, R0 726 MOVB R0, ret+24(FP) 727 RET 728 729 // memequal_varlen(a, b unsafe.Pointer) bool 730 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 731 MOVD a+0(FP), R3 732 MOVD b+8(FP), R4 733 CMP R3, R4 734 BEQ eq 735 MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure 736 MOVD R3, 8(RSP) 737 MOVD R4, 16(RSP) 738 MOVD R5, 24(RSP) 739 BL runtime·memequal(SB) 740 MOVBU 32(RSP), R3 741 MOVB R3, ret+16(FP) 742 RET 743 eq: 744 MOVD $1, R3 745 MOVB R3, ret+16(FP) 746 RET 747 748 TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40 749 MOVD s1_base+0(FP), R2 750 MOVD s1_len+8(FP), R0 751 MOVD s2_base+16(FP), R3 752 MOVD s2_len+24(FP), R1 753 ADD $40, RSP, R7 754 B runtime·cmpbody<>(SB) 755 756 TEXT bytes·Compare(SB),NOSPLIT,$-4-56 757 MOVD s1+0(FP), R2 758 MOVD s1+8(FP), R0 759 MOVD s2+24(FP), R3 760 MOVD s2+32(FP), R1 761 ADD $56, RSP, R7 762 B runtime·cmpbody<>(SB) 763 764 // On entry: 765 // R0 is the length of s1 766 // R1 is the length of s2 767 // R2 points to the start of s1 768 // R3 points to the start of s2 769 // R7 points to return value (-1/0/1 will be written here) 770 // 771 // On exit: 772 // R4, R5, and R6 are clobbered 773 TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0 774 CMP R2, R3 775 BEQ samebytes // same starting pointers; compare lengths 776 CMP R0, R1 777 CSEL LT, R1, R0, R6 // R6 is min(R0, R1) 778 779 ADD R2, R6 // R2 is current byte in s1, R6 is last byte in s1 to compare 780 loop: 781 CMP R2, R6 782 BEQ samebytes // all compared bytes were the same; compare lengths 783 MOVBU.P 1(R2), R4 784 MOVBU.P 1(R3), R5 785 CMP R4, R5 786 BEQ loop 787 // bytes differed 788 MOVD $1, R4 789 CSNEG LT, R4, R4, R4 790 MOVD R4, (R7) 791 RET 792 samebytes: 793 MOVD $1, R4 794 CMP R0, R1 795 CSNEG LT, R4, R4, R4 796 CSEL EQ, ZR, R4, R4 797 MOVD R4, (R7) 798 RET 799 800 // 801 // functions for other packages 802 // 803 TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 804 MOVD b+0(FP), R0 805 MOVD b_len+8(FP), R2 806 MOVBU c+24(FP), R1 807 MOVD $ret+32(FP), R8 808 B runtime·indexbytebody<>(SB) 809 810 TEXT strings·IndexByte(SB),NOSPLIT,$0-32 811 MOVD s+0(FP), R0 812 MOVD s_len+8(FP), R2 813 MOVBU c+16(FP), R1 814 MOVD $ret+24(FP), R8 815 B runtime·indexbytebody<>(SB) 816 817 // input: 818 // R0: data 819 // R1: byte to search 820 // R2: data len 821 // R8: address to put result 822 TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0 823 // Core algorithm: 824 // For each 32-byte chunk we calculate a 64-bit syndrome value, 825 // with two bits per byte. For each tuple, bit 0 is set if the 826 // relevant byte matched the requested character and bit 1 is 827 // not used (faster than using a 32bit syndrome). Since the bits 828 // in the syndrome reflect exactly the order in which things occur 829 // in the original string, counting trailing zeros allows to 830 // identify exactly which byte has matched. 831 832 CBZ R2, fail 833 MOVD R0, R11 834 // Magic constant 0x40100401 allows us to identify 835 // which lane matches the requested byte. 836 // 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24)) 837 // Different bytes have different bit masks (i.e: 1, 4, 16, 64) 838 MOVD $0x40100401, R5 839 VMOV R1, V0.B16 840 // Work with aligned 32-byte chunks 841 BIC $0x1f, R0, R3 842 VMOV R5, V5.S4 843 ANDS $0x1f, R0, R9 844 AND $0x1f, R2, R10 845 BEQ loop 846 847 // Input string is not 32-byte aligned. We calculate the 848 // syndrome value for the aligned 32 bytes block containing 849 // the first bytes and mask off the irrelevant part. 850 VLD1.P (R3), [V1.B16, V2.B16] 851 SUB $0x20, R9, R4 852 ADDS R4, R2, R2 853 VCMEQ V0.B16, V1.B16, V3.B16 854 VCMEQ V0.B16, V2.B16, V4.B16 855 VAND V5.B16, V3.B16, V3.B16 856 VAND V5.B16, V4.B16, V4.B16 857 VADDP V4.B16, V3.B16, V6.B16 // 256->128 858 VADDP V6.B16, V6.B16, V6.B16 // 128->64 859 VMOV V6.D[0], R6 860 // Clear the irrelevant lower bits 861 LSL $1, R9, R4 862 LSR R4, R6, R6 863 LSL R4, R6, R6 864 // The first block can also be the last 865 BLS masklast 866 // Have we found something already? 867 CBNZ R6, tail 868 869 loop: 870 VLD1.P (R3), [V1.B16, V2.B16] 871 SUBS $0x20, R2, R2 872 VCMEQ V0.B16, V1.B16, V3.B16 873 VCMEQ V0.B16, V2.B16, V4.B16 874 // If we're out of data we finish regardless of the result 875 BLS end 876 // Use a fast check for the termination condition 877 VORR V4.B16, V3.B16, V6.B16 878 VADDP V6.D2, V6.D2, V6.D2 879 VMOV V6.D[0], R6 880 // We're not out of data, loop if we haven't found the character 881 CBZ R6, loop 882 883 end: 884 // Termination condition found, let's calculate the syndrome value 885 VAND V5.B16, V3.B16, V3.B16 886 VAND V5.B16, V4.B16, V4.B16 887 VADDP V4.B16, V3.B16, V6.B16 888 VADDP V6.B16, V6.B16, V6.B16 889 VMOV V6.D[0], R6 890 // Only do the clear for the last possible block with less than 32 bytes 891 // Condition flags come from SUBS in the loop 892 BHS tail 893 894 masklast: 895 // Clear the irrelevant upper bits 896 ADD R9, R10, R4 897 AND $0x1f, R4, R4 898 SUB $0x20, R4, R4 899 NEG R4<<1, R4 900 LSL R4, R6, R6 901 LSR R4, R6, R6 902 903 tail: 904 // Check that we have found a character 905 CBZ R6, fail 906 // Count the trailing zeros using bit reversing 907 RBIT R6, R6 908 // Compensate the last post-increment 909 SUB $0x20, R3, R3 910 // And count the leading zeros 911 CLZ R6, R6 912 // R6 is twice the offset into the fragment 913 ADD R6>>1, R3, R0 914 // Compute the offset result 915 SUB R11, R0, R0 916 MOVD R0, (R8) 917 RET 918 919 fail: 920 MOVD $-1, R0 921 MOVD R0, (R8) 922 RET 923 924 // Equal(a, b []byte) bool 925 TEXT bytes·Equal(SB),NOSPLIT,$0-49 926 MOVD a_len+8(FP), R1 927 MOVD b_len+32(FP), R3 928 CMP R1, R3 929 // unequal lengths are not equal 930 BNE not_equal 931 // short path to handle 0-byte case 932 CBZ R1, equal 933 MOVD a+0(FP), R0 934 MOVD b+24(FP), R2 935 MOVD $ret+48(FP), R8 936 B runtime·memeqbody<>(SB) 937 equal: 938 MOVD $1, R0 939 MOVB R0, ret+48(FP) 940 RET 941 not_equal: 942 MOVB ZR, ret+48(FP) 943 RET 944 945 // input: 946 // R0: pointer a 947 // R1: data len 948 // R2: pointer b 949 // R8: address to put result 950 TEXT runtime·memeqbody<>(SB),NOSPLIT,$0 951 CMP $1, R1 952 // handle 1-byte special case for better performance 953 BEQ one 954 CMP $16, R1 955 // handle specially if length < 16 956 BLO tail 957 BIC $0x3f, R1, R3 958 CBZ R3, chunk16 959 // work with 64-byte chunks 960 ADD R3, R0, R6 // end of chunks 961 chunk64_loop: 962 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] 963 VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2] 964 VCMEQ V0.D2, V4.D2, V8.D2 965 VCMEQ V1.D2, V5.D2, V9.D2 966 VCMEQ V2.D2, V6.D2, V10.D2 967 VCMEQ V3.D2, V7.D2, V11.D2 968 VAND V8.B16, V9.B16, V8.B16 969 VAND V8.B16, V10.B16, V8.B16 970 VAND V8.B16, V11.B16, V8.B16 971 CMP R0, R6 972 VMOV V8.D[0], R4 973 VMOV V8.D[1], R5 974 CBZ R4, not_equal 975 CBZ R5, not_equal 976 BNE chunk64_loop 977 AND $0x3f, R1, R1 978 CBZ R1, equal 979 chunk16: 980 // work with 16-byte chunks 981 BIC $0xf, R1, R3 982 CBZ R3, tail 983 ADD R3, R0, R6 // end of chunks 984 chunk16_loop: 985 VLD1.P (R0), [V0.D2] 986 VLD1.P (R2), [V1.D2] 987 VCMEQ V0.D2, V1.D2, V2.D2 988 CMP R0, R6 989 VMOV V2.D[0], R4 990 VMOV V2.D[1], R5 991 CBZ R4, not_equal 992 CBZ R5, not_equal 993 BNE chunk16_loop 994 AND $0xf, R1, R1 995 CBZ R1, equal 996 tail: 997 // special compare of tail with length < 16 998 TBZ $3, R1, lt_8 999 MOVD.P 8(R0), R4 1000 MOVD.P 8(R2), R5 1001 CMP R4, R5 1002 BNE not_equal 1003 lt_8: 1004 TBZ $2, R1, lt_4 1005 MOVWU.P 4(R0), R4 1006 MOVWU.P 4(R2), R5 1007 CMP R4, R5 1008 BNE not_equal 1009 lt_4: 1010 TBZ $1, R1, lt_2 1011 MOVHU.P 2(R0), R4 1012 MOVHU.P 2(R2), R5 1013 CMP R4, R5 1014 BNE not_equal 1015 lt_2: 1016 TBZ $0, R1, equal 1017 one: 1018 MOVBU (R0), R4 1019 MOVBU (R2), R5 1020 CMP R4, R5 1021 BNE not_equal 1022 equal: 1023 MOVD $1, R0 1024 MOVB R0, (R8) 1025 RET 1026 not_equal: 1027 MOVB ZR, (R8) 1028 RET 1029 1030 TEXT runtime·return0(SB), NOSPLIT, $0 1031 MOVW $0, R0 1032 RET 1033 1034 // The top-most function running on a goroutine 1035 // returns to goexit+PCQuantum. 1036 TEXT runtime·goexit(SB),NOSPLIT,$-8-0 1037 MOVD R0, R0 // NOP 1038 BL runtime·goexit1(SB) // does not return 1039 1040 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 1041 RET 1042 1043 // This is called from .init_array and follows the platform, not Go, ABI. 1044 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1045 SUB $0x10, RSP 1046 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save 1047 MOVD runtime·lastmoduledatap(SB), R1 1048 MOVD R0, moduledata_next(R1) 1049 MOVD R0, runtime·lastmoduledatap(SB) 1050 MOVD 8(RSP), R27 1051 ADD $0x10, RSP 1052 RET 1053 1054 TEXT ·checkASM(SB),NOSPLIT,$0-1 1055 MOVW $1, R3 1056 MOVB R3, ret+0(FP) 1057 RET