github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/asm_arm64.s (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "tls_arm64.h" 8 #include "funcdata.h" 9 #include "textflag.h" 10 11 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0 12 // SP = stack; R0 = argc; R1 = argv 13 14 SUB $32, RSP 15 MOVW R0, 8(RSP) // argc 16 MOVD R1, 16(RSP) // argv 17 18 #ifdef TLS_darwin 19 // Initialize TLS. 20 MOVD ZR, g // clear g, make sure it's not junk. 21 SUB $32, RSP 22 MRS_TPIDR_R0 23 AND $~7, R0 24 MOVD R0, 16(RSP) // arg2: TLS base 25 MOVD $runtime·tls_g(SB), R2 26 MOVD R2, 8(RSP) // arg1: &tlsg 27 BL ·tlsinit(SB) 28 ADD $32, RSP 29 #endif 30 31 // create istack out of the given (operating system) stack. 32 // _cgo_init may update stackguard. 33 MOVD $runtime·g0(SB), g 34 MOVD RSP, R7 35 MOVD $(-64*1024)(R7), R0 36 MOVD R0, g_stackguard0(g) 37 MOVD R0, g_stackguard1(g) 38 MOVD R0, (g_stack+stack_lo)(g) 39 MOVD R7, (g_stack+stack_hi)(g) 40 41 // if there is a _cgo_init, call it using the gcc ABI. 42 MOVD _cgo_init(SB), R12 43 CBZ R12, nocgo 44 45 #ifdef GOOS_android 46 MRS_TPIDR_R0 // load TLS base pointer 47 MOVD R0, R3 // arg 3: TLS base pointer 48 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g 49 #else 50 MOVD $0, R2 // arg 2: not used when using platform's TLS 51 #endif 52 MOVD $setg_gcc<>(SB), R1 // arg 1: setg 53 MOVD g, R0 // arg 0: G 54 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. 55 BL (R12) 56 ADD $16, RSP 57 58 nocgo: 59 BL runtime·save_g(SB) 60 // update stackguard after _cgo_init 61 MOVD (g_stack+stack_lo)(g), R0 62 ADD $const_stackGuard, R0 63 MOVD R0, g_stackguard0(g) 64 MOVD R0, g_stackguard1(g) 65 66 // set the per-goroutine and per-mach "registers" 67 MOVD $runtime·m0(SB), R0 68 69 // save m->g0 = g0 70 MOVD g, m_g0(R0) 71 // save m0 to g0->m 72 MOVD R0, g_m(g) 73 74 BL runtime·check(SB) 75 76 #ifdef GOOS_windows 77 BL runtime·wintls(SB) 78 #endif 79 80 MOVW 8(RSP), R0 // copy argc 81 MOVW R0, -8(RSP) 82 MOVD 16(RSP), R0 // copy argv 83 MOVD R0, 0(RSP) 84 BL runtime·args(SB) 85 BL runtime·osinit(SB) 86 BL runtime·schedinit(SB) 87 88 // create a new goroutine to start program 89 MOVD $runtime·mainPC(SB), R0 // entry 90 SUB $16, RSP 91 MOVD R0, 8(RSP) // arg 92 MOVD $0, 0(RSP) // dummy LR 93 BL runtime·newproc(SB) 94 ADD $16, RSP 95 96 // start this M 97 BL runtime·mstart(SB) 98 99 // Prevent dead-code elimination of debugCallV2, which is 100 // intended to be called by debuggers. 101 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0 102 103 MOVD $0, R0 104 MOVD R0, (R0) // boom 105 UNDEF 106 107 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB) 108 GLOBL runtime·mainPC(SB),RODATA,$8 109 110 // Windows ARM64 needs an immediate 0xf000 argument. 111 // See go.dev/issues/53837. 112 #define BREAK \ 113 #ifdef GOOS_windows \ 114 BRK $0xf000 \ 115 #else \ 116 BRK \ 117 #endif \ 118 119 120 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 121 BREAK 122 RET 123 124 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0 125 RET 126 127 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 128 BL runtime·mstart0(SB) 129 RET // not reached 130 131 /* 132 * go-routine 133 */ 134 135 // void gogo(Gobuf*) 136 // restore state from Gobuf; longjmp 137 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8 138 MOVD buf+0(FP), R5 139 MOVD gobuf_g(R5), R6 140 MOVD 0(R6), R4 // make sure g != nil 141 B gogo<>(SB) 142 143 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 144 MOVD R6, g 145 BL runtime·save_g(SB) 146 147 MOVD gobuf_sp(R5), R0 148 MOVD R0, RSP 149 MOVD gobuf_bp(R5), R29 150 MOVD gobuf_lr(R5), LR 151 MOVD gobuf_ret(R5), R0 152 MOVD gobuf_ctxt(R5), R26 153 MOVD $0, gobuf_sp(R5) 154 MOVD $0, gobuf_bp(R5) 155 MOVD $0, gobuf_ret(R5) 156 MOVD $0, gobuf_lr(R5) 157 MOVD $0, gobuf_ctxt(R5) 158 CMP ZR, ZR // set condition codes for == test, needed by stack split 159 MOVD gobuf_pc(R5), R6 160 B (R6) 161 162 // void mcall(fn func(*g)) 163 // Switch to m->g0's stack, call fn(g). 164 // Fn must never return. It should gogo(&g->sched) 165 // to keep running g. 166 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8 167 MOVD R0, R26 // context 168 169 // Save caller state in g->sched 170 MOVD RSP, R0 171 MOVD R0, (g_sched+gobuf_sp)(g) 172 MOVD R29, (g_sched+gobuf_bp)(g) 173 MOVD LR, (g_sched+gobuf_pc)(g) 174 MOVD $0, (g_sched+gobuf_lr)(g) 175 176 // Switch to m->g0 & its stack, call fn. 177 MOVD g, R3 178 MOVD g_m(g), R8 179 MOVD m_g0(R8), g 180 BL runtime·save_g(SB) 181 CMP g, R3 182 BNE 2(PC) 183 B runtime·badmcall(SB) 184 185 MOVD (g_sched+gobuf_sp)(g), R0 186 MOVD R0, RSP // sp = m->g0->sched.sp 187 MOVD (g_sched+gobuf_bp)(g), R29 188 MOVD R3, R0 // arg = g 189 MOVD $0, -16(RSP) // dummy LR 190 SUB $16, RSP 191 MOVD 0(R26), R4 // code pointer 192 BL (R4) 193 B runtime·badmcall2(SB) 194 195 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 196 // of the G stack. We need to distinguish the routine that 197 // lives at the bottom of the G stack from the one that lives 198 // at the top of the system stack because the one at the top of 199 // the system stack terminates the stack walk (see topofstack()). 200 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 201 UNDEF 202 BL (LR) // make sure this function is not leaf 203 RET 204 205 // func systemstack(fn func()) 206 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 207 MOVD fn+0(FP), R3 // R3 = fn 208 MOVD R3, R26 // context 209 MOVD g_m(g), R4 // R4 = m 210 211 MOVD m_gsignal(R4), R5 // R5 = gsignal 212 CMP g, R5 213 BEQ noswitch 214 215 MOVD m_g0(R4), R5 // R5 = g0 216 CMP g, R5 217 BEQ noswitch 218 219 MOVD m_curg(R4), R6 220 CMP g, R6 221 BEQ switch 222 223 // Bad: g is not gsignal, not g0, not curg. What is it? 224 // Hide call from linker nosplit analysis. 225 MOVD $runtime·badsystemstack(SB), R3 226 BL (R3) 227 B runtime·abort(SB) 228 229 switch: 230 // save our state in g->sched. Pretend to 231 // be systemstack_switch if the G stack is scanned. 232 BL gosave_systemstack_switch<>(SB) 233 234 // switch to g0 235 MOVD R5, g 236 BL runtime·save_g(SB) 237 MOVD (g_sched+gobuf_sp)(g), R3 238 MOVD R3, RSP 239 MOVD (g_sched+gobuf_bp)(g), R29 240 241 // call target function 242 MOVD 0(R26), R3 // code pointer 243 BL (R3) 244 245 // switch back to g 246 MOVD g_m(g), R3 247 MOVD m_curg(R3), g 248 BL runtime·save_g(SB) 249 MOVD (g_sched+gobuf_sp)(g), R0 250 MOVD R0, RSP 251 MOVD (g_sched+gobuf_bp)(g), R29 252 MOVD $0, (g_sched+gobuf_sp)(g) 253 MOVD $0, (g_sched+gobuf_bp)(g) 254 RET 255 256 noswitch: 257 // already on m stack, just call directly 258 // Using a tail call here cleans up tracebacks since we won't stop 259 // at an intermediate systemstack. 260 MOVD 0(R26), R3 // code pointer 261 MOVD.P 16(RSP), R30 // restore LR 262 SUB $8, RSP, R29 // restore FP 263 B (R3) 264 265 /* 266 * support for morestack 267 */ 268 269 // Called during function prolog when more stack is needed. 270 // Caller has already loaded: 271 // R3 prolog's LR (R30) 272 // 273 // The traceback routines see morestack on a g0 as being 274 // the top of a stack (for example, morestack calling newstack 275 // calling the scheduler calling newm calling gc), so we must 276 // record an argument size. For that purpose, it has no arguments. 277 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 278 // Cannot grow scheduler stack (m->g0). 279 MOVD g_m(g), R8 280 MOVD m_g0(R8), R4 281 CMP g, R4 282 BNE 3(PC) 283 BL runtime·badmorestackg0(SB) 284 B runtime·abort(SB) 285 286 // Cannot grow signal stack (m->gsignal). 287 MOVD m_gsignal(R8), R4 288 CMP g, R4 289 BNE 3(PC) 290 BL runtime·badmorestackgsignal(SB) 291 B runtime·abort(SB) 292 293 // Called from f. 294 // Set g->sched to context in f 295 MOVD RSP, R0 296 MOVD R0, (g_sched+gobuf_sp)(g) 297 MOVD R29, (g_sched+gobuf_bp)(g) 298 MOVD LR, (g_sched+gobuf_pc)(g) 299 MOVD R3, (g_sched+gobuf_lr)(g) 300 MOVD R26, (g_sched+gobuf_ctxt)(g) 301 302 // Called from f. 303 // Set m->morebuf to f's callers. 304 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC 305 MOVD RSP, R0 306 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP 307 MOVD g, (m_morebuf+gobuf_g)(R8) 308 309 // Call newstack on m->g0's stack. 310 MOVD m_g0(R8), g 311 BL runtime·save_g(SB) 312 MOVD (g_sched+gobuf_sp)(g), R0 313 MOVD R0, RSP 314 MOVD (g_sched+gobuf_bp)(g), R29 315 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned) 316 BL runtime·newstack(SB) 317 318 // Not reached, but make sure the return PC from the call to newstack 319 // is still in this function, and not the beginning of the next. 320 UNDEF 321 322 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 323 // Force SPWRITE. This function doesn't actually write SP, 324 // but it is called with a special calling convention where 325 // the caller doesn't save LR on stack but passes it as a 326 // register (R3), and the unwinder currently doesn't understand. 327 // Make it SPWRITE to stop unwinding. (See issue 54332) 328 MOVD RSP, RSP 329 330 MOVW $0, R26 331 B runtime·morestack(SB) 332 333 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20. 334 TEXT ·spillArgs(SB),NOSPLIT,$0-0 335 STP (R0, R1), (0*8)(R20) 336 STP (R2, R3), (2*8)(R20) 337 STP (R4, R5), (4*8)(R20) 338 STP (R6, R7), (6*8)(R20) 339 STP (R8, R9), (8*8)(R20) 340 STP (R10, R11), (10*8)(R20) 341 STP (R12, R13), (12*8)(R20) 342 STP (R14, R15), (14*8)(R20) 343 FSTPD (F0, F1), (16*8)(R20) 344 FSTPD (F2, F3), (18*8)(R20) 345 FSTPD (F4, F5), (20*8)(R20) 346 FSTPD (F6, F7), (22*8)(R20) 347 FSTPD (F8, F9), (24*8)(R20) 348 FSTPD (F10, F11), (26*8)(R20) 349 FSTPD (F12, F13), (28*8)(R20) 350 FSTPD (F14, F15), (30*8)(R20) 351 RET 352 353 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20. 354 TEXT ·unspillArgs(SB),NOSPLIT,$0-0 355 LDP (0*8)(R20), (R0, R1) 356 LDP (2*8)(R20), (R2, R3) 357 LDP (4*8)(R20), (R4, R5) 358 LDP (6*8)(R20), (R6, R7) 359 LDP (8*8)(R20), (R8, R9) 360 LDP (10*8)(R20), (R10, R11) 361 LDP (12*8)(R20), (R12, R13) 362 LDP (14*8)(R20), (R14, R15) 363 FLDPD (16*8)(R20), (F0, F1) 364 FLDPD (18*8)(R20), (F2, F3) 365 FLDPD (20*8)(R20), (F4, F5) 366 FLDPD (22*8)(R20), (F6, F7) 367 FLDPD (24*8)(R20), (F8, F9) 368 FLDPD (26*8)(R20), (F10, F11) 369 FLDPD (28*8)(R20), (F12, F13) 370 FLDPD (30*8)(R20), (F14, F15) 371 RET 372 373 // reflectcall: call a function with the given argument list 374 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 375 // we don't have variable-sized frames, so we use a small number 376 // of constant-sized-frame functions to encode a few bits of size in the pc. 377 // Caution: ugly multiline assembly macros in your future! 378 379 #define DISPATCH(NAME,MAXSIZE) \ 380 MOVD $MAXSIZE, R27; \ 381 CMP R27, R16; \ 382 BGT 3(PC); \ 383 MOVD $NAME(SB), R27; \ 384 B (R27) 385 // Note: can't just "B NAME(SB)" - bad inlining results. 386 387 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48 388 MOVWU frameSize+32(FP), R16 389 DISPATCH(runtime·call16, 16) 390 DISPATCH(runtime·call32, 32) 391 DISPATCH(runtime·call64, 64) 392 DISPATCH(runtime·call128, 128) 393 DISPATCH(runtime·call256, 256) 394 DISPATCH(runtime·call512, 512) 395 DISPATCH(runtime·call1024, 1024) 396 DISPATCH(runtime·call2048, 2048) 397 DISPATCH(runtime·call4096, 4096) 398 DISPATCH(runtime·call8192, 8192) 399 DISPATCH(runtime·call16384, 16384) 400 DISPATCH(runtime·call32768, 32768) 401 DISPATCH(runtime·call65536, 65536) 402 DISPATCH(runtime·call131072, 131072) 403 DISPATCH(runtime·call262144, 262144) 404 DISPATCH(runtime·call524288, 524288) 405 DISPATCH(runtime·call1048576, 1048576) 406 DISPATCH(runtime·call2097152, 2097152) 407 DISPATCH(runtime·call4194304, 4194304) 408 DISPATCH(runtime·call8388608, 8388608) 409 DISPATCH(runtime·call16777216, 16777216) 410 DISPATCH(runtime·call33554432, 33554432) 411 DISPATCH(runtime·call67108864, 67108864) 412 DISPATCH(runtime·call134217728, 134217728) 413 DISPATCH(runtime·call268435456, 268435456) 414 DISPATCH(runtime·call536870912, 536870912) 415 DISPATCH(runtime·call1073741824, 1073741824) 416 MOVD $runtime·badreflectcall(SB), R0 417 B (R0) 418 419 #define CALLFN(NAME,MAXSIZE) \ 420 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \ 421 NO_LOCAL_POINTERS; \ 422 /* copy arguments to stack */ \ 423 MOVD stackArgs+16(FP), R3; \ 424 MOVWU stackArgsSize+24(FP), R4; \ 425 ADD $8, RSP, R5; \ 426 BIC $0xf, R4, R6; \ 427 CBZ R6, 6(PC); \ 428 /* if R6=(argsize&~15) != 0 */ \ 429 ADD R6, R5, R6; \ 430 /* copy 16 bytes a time */ \ 431 LDP.P 16(R3), (R7, R8); \ 432 STP.P (R7, R8), 16(R5); \ 433 CMP R5, R6; \ 434 BNE -3(PC); \ 435 AND $0xf, R4, R6; \ 436 CBZ R6, 6(PC); \ 437 /* if R6=(argsize&15) != 0 */ \ 438 ADD R6, R5, R6; \ 439 /* copy 1 byte a time for the rest */ \ 440 MOVBU.P 1(R3), R7; \ 441 MOVBU.P R7, 1(R5); \ 442 CMP R5, R6; \ 443 BNE -3(PC); \ 444 /* set up argument registers */ \ 445 MOVD regArgs+40(FP), R20; \ 446 CALL ·unspillArgs(SB); \ 447 /* call function */ \ 448 MOVD f+8(FP), R26; \ 449 MOVD (R26), R20; \ 450 PCDATA $PCDATA_StackMapIndex, $0; \ 451 BL (R20); \ 452 /* copy return values back */ \ 453 MOVD regArgs+40(FP), R20; \ 454 CALL ·spillArgs(SB); \ 455 MOVD stackArgsType+0(FP), R7; \ 456 MOVD stackArgs+16(FP), R3; \ 457 MOVWU stackArgsSize+24(FP), R4; \ 458 MOVWU stackRetOffset+28(FP), R6; \ 459 ADD $8, RSP, R5; \ 460 ADD R6, R5; \ 461 ADD R6, R3; \ 462 SUB R6, R4; \ 463 BL callRet<>(SB); \ 464 RET 465 466 // callRet copies return values back at the end of call*. This is a 467 // separate function so it can allocate stack space for the arguments 468 // to reflectcallmove. It does not follow the Go ABI; it expects its 469 // arguments in registers. 470 TEXT callRet<>(SB), NOSPLIT, $48-0 471 NO_LOCAL_POINTERS 472 STP (R7, R3), 8(RSP) 473 STP (R5, R4), 24(RSP) 474 MOVD R20, 40(RSP) 475 BL runtime·reflectcallmove(SB) 476 RET 477 478 CALLFN(·call16, 16) 479 CALLFN(·call32, 32) 480 CALLFN(·call64, 64) 481 CALLFN(·call128, 128) 482 CALLFN(·call256, 256) 483 CALLFN(·call512, 512) 484 CALLFN(·call1024, 1024) 485 CALLFN(·call2048, 2048) 486 CALLFN(·call4096, 4096) 487 CALLFN(·call8192, 8192) 488 CALLFN(·call16384, 16384) 489 CALLFN(·call32768, 32768) 490 CALLFN(·call65536, 65536) 491 CALLFN(·call131072, 131072) 492 CALLFN(·call262144, 262144) 493 CALLFN(·call524288, 524288) 494 CALLFN(·call1048576, 1048576) 495 CALLFN(·call2097152, 2097152) 496 CALLFN(·call4194304, 4194304) 497 CALLFN(·call8388608, 8388608) 498 CALLFN(·call16777216, 16777216) 499 CALLFN(·call33554432, 33554432) 500 CALLFN(·call67108864, 67108864) 501 CALLFN(·call134217728, 134217728) 502 CALLFN(·call268435456, 268435456) 503 CALLFN(·call536870912, 536870912) 504 CALLFN(·call1073741824, 1073741824) 505 506 // func memhash32(p unsafe.Pointer, h uintptr) uintptr 507 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24 508 MOVB runtime·useAeshash(SB), R10 509 CBZ R10, noaes 510 MOVD $runtime·aeskeysched+0(SB), R3 511 512 VEOR V0.B16, V0.B16, V0.B16 513 VLD1 (R3), [V2.B16] 514 VLD1 (R0), V0.S[1] 515 VMOV R1, V0.S[0] 516 517 AESE V2.B16, V0.B16 518 AESMC V0.B16, V0.B16 519 AESE V2.B16, V0.B16 520 AESMC V0.B16, V0.B16 521 AESE V2.B16, V0.B16 522 523 VMOV V0.D[0], R0 524 RET 525 noaes: 526 B runtime·memhash32Fallback<ABIInternal>(SB) 527 528 // func memhash64(p unsafe.Pointer, h uintptr) uintptr 529 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24 530 MOVB runtime·useAeshash(SB), R10 531 CBZ R10, noaes 532 MOVD $runtime·aeskeysched+0(SB), R3 533 534 VEOR V0.B16, V0.B16, V0.B16 535 VLD1 (R3), [V2.B16] 536 VLD1 (R0), V0.D[1] 537 VMOV R1, V0.D[0] 538 539 AESE V2.B16, V0.B16 540 AESMC V0.B16, V0.B16 541 AESE V2.B16, V0.B16 542 AESMC V0.B16, V0.B16 543 AESE V2.B16, V0.B16 544 545 VMOV V0.D[0], R0 546 RET 547 noaes: 548 B runtime·memhash64Fallback<ABIInternal>(SB) 549 550 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr 551 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32 552 MOVB runtime·useAeshash(SB), R10 553 CBZ R10, noaes 554 B aeshashbody<>(SB) 555 noaes: 556 B runtime·memhashFallback<ABIInternal>(SB) 557 558 // func strhash(p unsafe.Pointer, h uintptr) uintptr 559 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24 560 MOVB runtime·useAeshash(SB), R10 561 CBZ R10, noaes 562 LDP (R0), (R0, R2) // string data / length 563 B aeshashbody<>(SB) 564 noaes: 565 B runtime·strhashFallback<ABIInternal>(SB) 566 567 // R0: data 568 // R1: seed data 569 // R2: length 570 // At return, R0 = return value 571 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0 572 VEOR V30.B16, V30.B16, V30.B16 573 VMOV R1, V30.D[0] 574 VMOV R2, V30.D[1] // load length into seed 575 576 MOVD $runtime·aeskeysched+0(SB), R4 577 VLD1.P 16(R4), [V0.B16] 578 AESE V30.B16, V0.B16 579 AESMC V0.B16, V0.B16 580 CMP $16, R2 581 BLO aes0to15 582 BEQ aes16 583 CMP $32, R2 584 BLS aes17to32 585 CMP $64, R2 586 BLS aes33to64 587 CMP $128, R2 588 BLS aes65to128 589 B aes129plus 590 591 aes0to15: 592 CBZ R2, aes0 593 VEOR V2.B16, V2.B16, V2.B16 594 TBZ $3, R2, less_than_8 595 VLD1.P 8(R0), V2.D[0] 596 597 less_than_8: 598 TBZ $2, R2, less_than_4 599 VLD1.P 4(R0), V2.S[2] 600 601 less_than_4: 602 TBZ $1, R2, less_than_2 603 VLD1.P 2(R0), V2.H[6] 604 605 less_than_2: 606 TBZ $0, R2, done 607 VLD1 (R0), V2.B[14] 608 done: 609 AESE V0.B16, V2.B16 610 AESMC V2.B16, V2.B16 611 AESE V0.B16, V2.B16 612 AESMC V2.B16, V2.B16 613 AESE V0.B16, V2.B16 614 AESMC V2.B16, V2.B16 615 616 VMOV V2.D[0], R0 617 RET 618 619 aes0: 620 VMOV V0.D[0], R0 621 RET 622 623 aes16: 624 VLD1 (R0), [V2.B16] 625 B done 626 627 aes17to32: 628 // make second seed 629 VLD1 (R4), [V1.B16] 630 AESE V30.B16, V1.B16 631 AESMC V1.B16, V1.B16 632 SUB $16, R2, R10 633 VLD1.P (R0)(R10), [V2.B16] 634 VLD1 (R0), [V3.B16] 635 636 AESE V0.B16, V2.B16 637 AESMC V2.B16, V2.B16 638 AESE V1.B16, V3.B16 639 AESMC V3.B16, V3.B16 640 641 AESE V0.B16, V2.B16 642 AESMC V2.B16, V2.B16 643 AESE V1.B16, V3.B16 644 AESMC V3.B16, V3.B16 645 646 AESE V0.B16, V2.B16 647 AESE V1.B16, V3.B16 648 649 VEOR V3.B16, V2.B16, V2.B16 650 651 VMOV V2.D[0], R0 652 RET 653 654 aes33to64: 655 VLD1 (R4), [V1.B16, V2.B16, V3.B16] 656 AESE V30.B16, V1.B16 657 AESMC V1.B16, V1.B16 658 AESE V30.B16, V2.B16 659 AESMC V2.B16, V2.B16 660 AESE V30.B16, V3.B16 661 AESMC V3.B16, V3.B16 662 SUB $32, R2, R10 663 664 VLD1.P (R0)(R10), [V4.B16, V5.B16] 665 VLD1 (R0), [V6.B16, V7.B16] 666 667 AESE V0.B16, V4.B16 668 AESMC V4.B16, V4.B16 669 AESE V1.B16, V5.B16 670 AESMC V5.B16, V5.B16 671 AESE V2.B16, V6.B16 672 AESMC V6.B16, V6.B16 673 AESE V3.B16, V7.B16 674 AESMC V7.B16, V7.B16 675 676 AESE V0.B16, V4.B16 677 AESMC V4.B16, V4.B16 678 AESE V1.B16, V5.B16 679 AESMC V5.B16, V5.B16 680 AESE V2.B16, V6.B16 681 AESMC V6.B16, V6.B16 682 AESE V3.B16, V7.B16 683 AESMC V7.B16, V7.B16 684 685 AESE V0.B16, V4.B16 686 AESE V1.B16, V5.B16 687 AESE V2.B16, V6.B16 688 AESE V3.B16, V7.B16 689 690 VEOR V6.B16, V4.B16, V4.B16 691 VEOR V7.B16, V5.B16, V5.B16 692 VEOR V5.B16, V4.B16, V4.B16 693 694 VMOV V4.D[0], R0 695 RET 696 697 aes65to128: 698 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16] 699 VLD1 (R4), [V5.B16, V6.B16, V7.B16] 700 AESE V30.B16, V1.B16 701 AESMC V1.B16, V1.B16 702 AESE V30.B16, V2.B16 703 AESMC V2.B16, V2.B16 704 AESE V30.B16, V3.B16 705 AESMC V3.B16, V3.B16 706 AESE V30.B16, V4.B16 707 AESMC V4.B16, V4.B16 708 AESE V30.B16, V5.B16 709 AESMC V5.B16, V5.B16 710 AESE V30.B16, V6.B16 711 AESMC V6.B16, V6.B16 712 AESE V30.B16, V7.B16 713 AESMC V7.B16, V7.B16 714 715 SUB $64, R2, R10 716 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16] 717 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16] 718 AESE V0.B16, V8.B16 719 AESMC V8.B16, V8.B16 720 AESE V1.B16, V9.B16 721 AESMC V9.B16, V9.B16 722 AESE V2.B16, V10.B16 723 AESMC V10.B16, V10.B16 724 AESE V3.B16, V11.B16 725 AESMC V11.B16, V11.B16 726 AESE V4.B16, V12.B16 727 AESMC V12.B16, V12.B16 728 AESE V5.B16, V13.B16 729 AESMC V13.B16, V13.B16 730 AESE V6.B16, V14.B16 731 AESMC V14.B16, V14.B16 732 AESE V7.B16, V15.B16 733 AESMC V15.B16, V15.B16 734 735 AESE V0.B16, V8.B16 736 AESMC V8.B16, V8.B16 737 AESE V1.B16, V9.B16 738 AESMC V9.B16, V9.B16 739 AESE V2.B16, V10.B16 740 AESMC V10.B16, V10.B16 741 AESE V3.B16, V11.B16 742 AESMC V11.B16, V11.B16 743 AESE V4.B16, V12.B16 744 AESMC V12.B16, V12.B16 745 AESE V5.B16, V13.B16 746 AESMC V13.B16, V13.B16 747 AESE V6.B16, V14.B16 748 AESMC V14.B16, V14.B16 749 AESE V7.B16, V15.B16 750 AESMC V15.B16, V15.B16 751 752 AESE V0.B16, V8.B16 753 AESE V1.B16, V9.B16 754 AESE V2.B16, V10.B16 755 AESE V3.B16, V11.B16 756 AESE V4.B16, V12.B16 757 AESE V5.B16, V13.B16 758 AESE V6.B16, V14.B16 759 AESE V7.B16, V15.B16 760 761 VEOR V12.B16, V8.B16, V8.B16 762 VEOR V13.B16, V9.B16, V9.B16 763 VEOR V14.B16, V10.B16, V10.B16 764 VEOR V15.B16, V11.B16, V11.B16 765 VEOR V10.B16, V8.B16, V8.B16 766 VEOR V11.B16, V9.B16, V9.B16 767 VEOR V9.B16, V8.B16, V8.B16 768 769 VMOV V8.D[0], R0 770 RET 771 772 aes129plus: 773 PRFM (R0), PLDL1KEEP 774 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16] 775 VLD1 (R4), [V5.B16, V6.B16, V7.B16] 776 AESE V30.B16, V1.B16 777 AESMC V1.B16, V1.B16 778 AESE V30.B16, V2.B16 779 AESMC V2.B16, V2.B16 780 AESE V30.B16, V3.B16 781 AESMC V3.B16, V3.B16 782 AESE V30.B16, V4.B16 783 AESMC V4.B16, V4.B16 784 AESE V30.B16, V5.B16 785 AESMC V5.B16, V5.B16 786 AESE V30.B16, V6.B16 787 AESMC V6.B16, V6.B16 788 AESE V30.B16, V7.B16 789 AESMC V7.B16, V7.B16 790 ADD R0, R2, R10 791 SUB $128, R10, R10 792 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16] 793 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16] 794 SUB $1, R2, R2 795 LSR $7, R2, R2 796 797 aesloop: 798 AESE V8.B16, V0.B16 799 AESMC V0.B16, V0.B16 800 AESE V9.B16, V1.B16 801 AESMC V1.B16, V1.B16 802 AESE V10.B16, V2.B16 803 AESMC V2.B16, V2.B16 804 AESE V11.B16, V3.B16 805 AESMC V3.B16, V3.B16 806 AESE V12.B16, V4.B16 807 AESMC V4.B16, V4.B16 808 AESE V13.B16, V5.B16 809 AESMC V5.B16, V5.B16 810 AESE V14.B16, V6.B16 811 AESMC V6.B16, V6.B16 812 AESE V15.B16, V7.B16 813 AESMC V7.B16, V7.B16 814 815 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16] 816 AESE V8.B16, V0.B16 817 AESMC V0.B16, V0.B16 818 AESE V9.B16, V1.B16 819 AESMC V1.B16, V1.B16 820 AESE V10.B16, V2.B16 821 AESMC V2.B16, V2.B16 822 AESE V11.B16, V3.B16 823 AESMC V3.B16, V3.B16 824 825 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16] 826 AESE V12.B16, V4.B16 827 AESMC V4.B16, V4.B16 828 AESE V13.B16, V5.B16 829 AESMC V5.B16, V5.B16 830 AESE V14.B16, V6.B16 831 AESMC V6.B16, V6.B16 832 AESE V15.B16, V7.B16 833 AESMC V7.B16, V7.B16 834 SUB $1, R2, R2 835 CBNZ R2, aesloop 836 837 AESE V8.B16, V0.B16 838 AESMC V0.B16, V0.B16 839 AESE V9.B16, V1.B16 840 AESMC V1.B16, V1.B16 841 AESE V10.B16, V2.B16 842 AESMC V2.B16, V2.B16 843 AESE V11.B16, V3.B16 844 AESMC V3.B16, V3.B16 845 AESE V12.B16, V4.B16 846 AESMC V4.B16, V4.B16 847 AESE V13.B16, V5.B16 848 AESMC V5.B16, V5.B16 849 AESE V14.B16, V6.B16 850 AESMC V6.B16, V6.B16 851 AESE V15.B16, V7.B16 852 AESMC V7.B16, V7.B16 853 854 AESE V8.B16, V0.B16 855 AESMC V0.B16, V0.B16 856 AESE V9.B16, V1.B16 857 AESMC V1.B16, V1.B16 858 AESE V10.B16, V2.B16 859 AESMC V2.B16, V2.B16 860 AESE V11.B16, V3.B16 861 AESMC V3.B16, V3.B16 862 AESE V12.B16, V4.B16 863 AESMC V4.B16, V4.B16 864 AESE V13.B16, V5.B16 865 AESMC V5.B16, V5.B16 866 AESE V14.B16, V6.B16 867 AESMC V6.B16, V6.B16 868 AESE V15.B16, V7.B16 869 AESMC V7.B16, V7.B16 870 871 AESE V8.B16, V0.B16 872 AESE V9.B16, V1.B16 873 AESE V10.B16, V2.B16 874 AESE V11.B16, V3.B16 875 AESE V12.B16, V4.B16 876 AESE V13.B16, V5.B16 877 AESE V14.B16, V6.B16 878 AESE V15.B16, V7.B16 879 880 VEOR V0.B16, V1.B16, V0.B16 881 VEOR V2.B16, V3.B16, V2.B16 882 VEOR V4.B16, V5.B16, V4.B16 883 VEOR V6.B16, V7.B16, V6.B16 884 VEOR V0.B16, V2.B16, V0.B16 885 VEOR V4.B16, V6.B16, V4.B16 886 VEOR V4.B16, V0.B16, V0.B16 887 888 VMOV V0.D[0], R0 889 RET 890 891 TEXT runtime·procyield(SB),NOSPLIT,$0-0 892 MOVWU cycles+0(FP), R0 893 again: 894 YIELD 895 SUBW $1, R0 896 CBNZ R0, again 897 RET 898 899 // Save state of caller into g->sched, 900 // but using fake PC from systemstack_switch. 901 // Must only be called from functions with no locals ($0) 902 // or else unwinding from systemstack_switch is incorrect. 903 // Smashes R0. 904 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0 905 MOVD $runtime·systemstack_switch(SB), R0 906 ADD $8, R0 // get past prologue 907 MOVD R0, (g_sched+gobuf_pc)(g) 908 MOVD RSP, R0 909 MOVD R0, (g_sched+gobuf_sp)(g) 910 MOVD R29, (g_sched+gobuf_bp)(g) 911 MOVD $0, (g_sched+gobuf_lr)(g) 912 MOVD $0, (g_sched+gobuf_ret)(g) 913 // Assert ctxt is zero. See func save. 914 MOVD (g_sched+gobuf_ctxt)(g), R0 915 CBZ R0, 2(PC) 916 CALL runtime·abort(SB) 917 RET 918 919 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 920 // Call fn(arg) aligned appropriately for the gcc ABI. 921 // Called on a system stack, and there may be no g yet (during needm). 922 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16 923 MOVD fn+0(FP), R1 924 MOVD arg+8(FP), R0 925 SUB $16, RSP // skip over saved frame pointer below RSP 926 BL (R1) 927 ADD $16, RSP // skip over saved frame pointer below RSP 928 RET 929 930 // func asmcgocall(fn, arg unsafe.Pointer) int32 931 // Call fn(arg) on the scheduler stack, 932 // aligned appropriately for the gcc ABI. 933 // See cgocall.go for more details. 934 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 935 MOVD fn+0(FP), R1 936 MOVD arg+8(FP), R0 937 938 MOVD RSP, R2 // save original stack pointer 939 CBZ g, nosave 940 MOVD g, R4 941 942 // Figure out if we need to switch to m->g0 stack. 943 // We get called to create new OS threads too, and those 944 // come in on the m->g0 stack already. Or we might already 945 // be on the m->gsignal stack. 946 MOVD g_m(g), R8 947 MOVD m_gsignal(R8), R3 948 CMP R3, g 949 BEQ nosave 950 MOVD m_g0(R8), R3 951 CMP R3, g 952 BEQ nosave 953 954 // Switch to system stack. 955 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0 956 BL gosave_systemstack_switch<>(SB) 957 MOVD R3, g 958 BL runtime·save_g(SB) 959 MOVD (g_sched+gobuf_sp)(g), R0 960 MOVD R0, RSP 961 MOVD (g_sched+gobuf_bp)(g), R29 962 MOVD R9, R0 963 964 // Now on a scheduling stack (a pthread-created stack). 965 // Save room for two of our pointers /*, plus 32 bytes of callee 966 // save area that lives on the caller stack. */ 967 MOVD RSP, R13 968 SUB $16, R13 969 MOVD R13, RSP 970 MOVD R4, 0(RSP) // save old g on stack 971 MOVD (g_stack+stack_hi)(R4), R4 972 SUB R2, R4 973 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback) 974 BL (R1) 975 MOVD R0, R9 976 977 // Restore g, stack pointer. R0 is errno, so don't touch it 978 MOVD 0(RSP), g 979 BL runtime·save_g(SB) 980 MOVD (g_stack+stack_hi)(g), R5 981 MOVD 8(RSP), R6 982 SUB R6, R5 983 MOVD R9, R0 984 MOVD R5, RSP 985 986 MOVW R0, ret+16(FP) 987 RET 988 989 nosave: 990 // Running on a system stack, perhaps even without a g. 991 // Having no g can happen during thread creation or thread teardown 992 // (see needm/dropm on Solaris, for example). 993 // This code is like the above sequence but without saving/restoring g 994 // and without worrying about the stack moving out from under us 995 // (because we're on a system stack, not a goroutine stack). 996 // The above code could be used directly if already on a system stack, 997 // but then the only path through this code would be a rare case on Solaris. 998 // Using this code for all "already on system stack" calls exercises it more, 999 // which should help keep it correct. 1000 MOVD RSP, R13 1001 SUB $16, R13 1002 MOVD R13, RSP 1003 MOVD $0, R4 1004 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging. 1005 MOVD R2, 8(RSP) // Save original stack pointer. 1006 BL (R1) 1007 // Restore stack pointer. 1008 MOVD 8(RSP), R2 1009 MOVD R2, RSP 1010 MOVD R0, ret+16(FP) 1011 RET 1012 1013 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 1014 // See cgocall.go for more details. 1015 TEXT ·cgocallback(SB),NOSPLIT,$24-24 1016 NO_LOCAL_POINTERS 1017 1018 // Load g from thread-local storage. 1019 BL runtime·load_g(SB) 1020 1021 // If g is nil, Go did not create the current thread. 1022 // Call needm to obtain one for temporary use. 1023 // In this case, we're running on the thread stack, so there's 1024 // lots of space, but the linker doesn't know. Hide the call from 1025 // the linker analysis by using an indirect call. 1026 CBZ g, needm 1027 1028 MOVD g_m(g), R8 1029 MOVD R8, savedm-8(SP) 1030 B havem 1031 1032 needm: 1033 MOVD g, savedm-8(SP) // g is zero, so is m. 1034 MOVD $runtime·needm(SB), R0 1035 BL (R0) 1036 1037 // Set m->g0->sched.sp = SP, so that if a panic happens 1038 // during the function we are about to execute, it will 1039 // have a valid SP to run on the g0 stack. 1040 // The next few lines (after the havem label) 1041 // will save this SP onto the stack and then write 1042 // the same SP back to m->sched.sp. That seems redundant, 1043 // but if an unrecovered panic happens, unwindm will 1044 // restore the g->sched.sp from the stack location 1045 // and then systemstack will try to use it. If we don't set it here, 1046 // that restored SP will be uninitialized (typically 0) and 1047 // will not be usable. 1048 MOVD g_m(g), R8 1049 MOVD m_g0(R8), R3 1050 MOVD RSP, R0 1051 MOVD R0, (g_sched+gobuf_sp)(R3) 1052 MOVD R29, (g_sched+gobuf_bp)(R3) 1053 1054 havem: 1055 // Now there's a valid m, and we're running on its m->g0. 1056 // Save current m->g0->sched.sp on stack and then set it to SP. 1057 // Save current sp in m->g0->sched.sp in preparation for 1058 // switch back to m->curg stack. 1059 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP). 1060 // Beware that the frame size is actually 32+16. 1061 MOVD m_g0(R8), R3 1062 MOVD (g_sched+gobuf_sp)(R3), R4 1063 MOVD R4, savedsp-16(SP) 1064 MOVD RSP, R0 1065 MOVD R0, (g_sched+gobuf_sp)(R3) 1066 1067 // Switch to m->curg stack and call runtime.cgocallbackg. 1068 // Because we are taking over the execution of m->curg 1069 // but *not* resuming what had been running, we need to 1070 // save that information (m->curg->sched) so we can restore it. 1071 // We can restore m->curg->sched.sp easily, because calling 1072 // runtime.cgocallbackg leaves SP unchanged upon return. 1073 // To save m->curg->sched.pc, we push it onto the curg stack and 1074 // open a frame the same size as cgocallback's g0 frame. 1075 // Once we switch to the curg stack, the pushed PC will appear 1076 // to be the return PC of cgocallback, so that the traceback 1077 // will seamlessly trace back into the earlier calls. 1078 MOVD m_curg(R8), g 1079 BL runtime·save_g(SB) 1080 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 1081 MOVD (g_sched+gobuf_pc)(g), R5 1082 MOVD R5, -48(R4) 1083 MOVD (g_sched+gobuf_bp)(g), R5 1084 MOVD R5, -56(R4) 1085 // Gather our arguments into registers. 1086 MOVD fn+0(FP), R1 1087 MOVD frame+8(FP), R2 1088 MOVD ctxt+16(FP), R3 1089 MOVD $-48(R4), R0 // maintain 16-byte SP alignment 1090 MOVD R0, RSP // switch stack 1091 MOVD R1, 8(RSP) 1092 MOVD R2, 16(RSP) 1093 MOVD R3, 24(RSP) 1094 MOVD $runtime·cgocallbackg(SB), R0 1095 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now. 1096 1097 // Restore g->sched (== m->curg->sched) from saved values. 1098 MOVD 0(RSP), R5 1099 MOVD R5, (g_sched+gobuf_pc)(g) 1100 MOVD RSP, R4 1101 ADD $48, R4, R4 1102 MOVD R4, (g_sched+gobuf_sp)(g) 1103 1104 // Switch back to m->g0's stack and restore m->g0->sched.sp. 1105 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 1106 // so we do not have to restore it.) 1107 MOVD g_m(g), R8 1108 MOVD m_g0(R8), g 1109 BL runtime·save_g(SB) 1110 MOVD (g_sched+gobuf_sp)(g), R0 1111 MOVD R0, RSP 1112 MOVD savedsp-16(SP), R4 1113 MOVD R4, (g_sched+gobuf_sp)(g) 1114 1115 // If the m on entry was nil, we called needm above to borrow an m 1116 // for the duration of the call. Since the call is over, return it with dropm. 1117 MOVD savedm-8(SP), R6 1118 CBNZ R6, droppedm 1119 MOVD $runtime·dropm(SB), R0 1120 BL (R0) 1121 droppedm: 1122 1123 // Done! 1124 RET 1125 1126 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1127 // Must obey the gcc calling convention. 1128 TEXT _cgo_topofstack(SB),NOSPLIT,$24 1129 // g (R28) and REGTMP (R27) might be clobbered by load_g. They 1130 // are callee-save in the gcc calling convention, so save them. 1131 MOVD R27, savedR27-8(SP) 1132 MOVD g, saveG-16(SP) 1133 1134 BL runtime·load_g(SB) 1135 MOVD g_m(g), R0 1136 MOVD m_curg(R0), R0 1137 MOVD (g_stack+stack_hi)(R0), R0 1138 1139 MOVD saveG-16(SP), g 1140 MOVD savedR28-8(SP), R27 1141 RET 1142 1143 // void setg(G*); set g. for use by needm. 1144 TEXT runtime·setg(SB), NOSPLIT, $0-8 1145 MOVD gg+0(FP), g 1146 // This only happens if iscgo, so jump straight to save_g 1147 BL runtime·save_g(SB) 1148 RET 1149 1150 // void setg_gcc(G*); set g called from gcc 1151 TEXT setg_gcc<>(SB),NOSPLIT,$8 1152 MOVD R0, g 1153 MOVD R27, savedR27-8(SP) 1154 BL runtime·save_g(SB) 1155 MOVD savedR27-8(SP), R27 1156 RET 1157 1158 TEXT runtime·emptyfunc(SB),0,$0-0 1159 RET 1160 1161 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 1162 MOVD ZR, R0 1163 MOVD (R0), R0 1164 UNDEF 1165 1166 TEXT runtime·return0(SB), NOSPLIT, $0 1167 MOVW $0, R0 1168 RET 1169 1170 // The top-most function running on a goroutine 1171 // returns to goexit+PCQuantum. 1172 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 1173 MOVD R0, R0 // NOP 1174 BL runtime·goexit1(SB) // does not return 1175 1176 // This is called from .init_array and follows the platform, not Go, ABI. 1177 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1178 SUB $0x10, RSP 1179 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save 1180 MOVD runtime·lastmoduledatap(SB), R1 1181 MOVD R0, moduledata_next(R1) 1182 MOVD R0, runtime·lastmoduledatap(SB) 1183 MOVD 8(RSP), R27 1184 ADD $0x10, RSP 1185 RET 1186 1187 TEXT ·checkASM(SB),NOSPLIT,$0-1 1188 MOVW $1, R3 1189 MOVB R3, ret+0(FP) 1190 RET 1191 1192 // gcWriteBarrier informs the GC about heap pointer writes. 1193 // 1194 // gcWriteBarrier does NOT follow the Go ABI. It accepts the 1195 // number of bytes of buffer needed in R25, and returns a pointer 1196 // to the buffer space in R25. 1197 // It clobbers condition codes. 1198 // It does not clobber any general-purpose registers except R27, 1199 // but may clobber others (e.g., floating point registers) 1200 // The act of CALLing gcWriteBarrier will clobber R30 (LR). 1201 TEXT gcWriteBarrier<>(SB),NOSPLIT,$200 1202 // Save the registers clobbered by the fast path. 1203 STP (R0, R1), 184(RSP) 1204 retry: 1205 MOVD g_m(g), R0 1206 MOVD m_p(R0), R0 1207 MOVD (p_wbBuf+wbBuf_next)(R0), R1 1208 MOVD (p_wbBuf+wbBuf_end)(R0), R27 1209 // Increment wbBuf.next position. 1210 ADD R25, R1 1211 // Is the buffer full? 1212 CMP R27, R1 1213 BHI flush 1214 // Commit to the larger buffer. 1215 MOVD R1, (p_wbBuf+wbBuf_next)(R0) 1216 // Make return value (the original next position) 1217 SUB R25, R1, R25 1218 // Restore registers. 1219 LDP 184(RSP), (R0, R1) 1220 RET 1221 1222 flush: 1223 // Save all general purpose registers since these could be 1224 // clobbered by wbBufFlush and were not saved by the caller. 1225 // R0 and R1 already saved 1226 STP (R2, R3), 1*8(RSP) 1227 STP (R4, R5), 3*8(RSP) 1228 STP (R6, R7), 5*8(RSP) 1229 STP (R8, R9), 7*8(RSP) 1230 STP (R10, R11), 9*8(RSP) 1231 STP (R12, R13), 11*8(RSP) 1232 STP (R14, R15), 13*8(RSP) 1233 // R16, R17 may be clobbered by linker trampoline 1234 // R18 is unused. 1235 STP (R19, R20), 15*8(RSP) 1236 STP (R21, R22), 17*8(RSP) 1237 STP (R23, R24), 19*8(RSP) 1238 STP (R25, R26), 21*8(RSP) 1239 // R27 is temp register. 1240 // R28 is g. 1241 // R29 is frame pointer (unused). 1242 // R30 is LR, which was saved by the prologue. 1243 // R31 is SP. 1244 1245 CALL runtime·wbBufFlush(SB) 1246 LDP 1*8(RSP), (R2, R3) 1247 LDP 3*8(RSP), (R4, R5) 1248 LDP 5*8(RSP), (R6, R7) 1249 LDP 7*8(RSP), (R8, R9) 1250 LDP 9*8(RSP), (R10, R11) 1251 LDP 11*8(RSP), (R12, R13) 1252 LDP 13*8(RSP), (R14, R15) 1253 LDP 15*8(RSP), (R19, R20) 1254 LDP 17*8(RSP), (R21, R22) 1255 LDP 19*8(RSP), (R23, R24) 1256 LDP 21*8(RSP), (R25, R26) 1257 JMP retry 1258 1259 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0 1260 MOVD $8, R25 1261 JMP gcWriteBarrier<>(SB) 1262 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0 1263 MOVD $16, R25 1264 JMP gcWriteBarrier<>(SB) 1265 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0 1266 MOVD $24, R25 1267 JMP gcWriteBarrier<>(SB) 1268 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0 1269 MOVD $32, R25 1270 JMP gcWriteBarrier<>(SB) 1271 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0 1272 MOVD $40, R25 1273 JMP gcWriteBarrier<>(SB) 1274 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0 1275 MOVD $48, R25 1276 JMP gcWriteBarrier<>(SB) 1277 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0 1278 MOVD $56, R25 1279 JMP gcWriteBarrier<>(SB) 1280 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0 1281 MOVD $64, R25 1282 JMP gcWriteBarrier<>(SB) 1283 1284 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" 1285 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below 1286 1287 // debugCallV2 is the entry point for debugger-injected function 1288 // calls on running goroutines. It informs the runtime that a 1289 // debug call has been injected and creates a call frame for the 1290 // debugger to fill in. 1291 // 1292 // To inject a function call, a debugger should: 1293 // 1. Check that the goroutine is in state _Grunning and that 1294 // there are at least 288 bytes free on the stack. 1295 // 2. Set SP as SP-16. 1296 // 3. Store the current LR in (SP) (using the SP after step 2). 1297 // 4. Store the current PC in the LR register. 1298 // 5. Write the desired argument frame size at SP-16 1299 // 6. Save all machine registers (including flags and fpsimd registers) 1300 // so they can be restored later by the debugger. 1301 // 7. Set the PC to debugCallV2 and resume execution. 1302 // 1303 // If the goroutine is in state _Grunnable, then it's not generally 1304 // safe to inject a call because it may return out via other runtime 1305 // operations. Instead, the debugger should unwind the stack to find 1306 // the return to non-runtime code, add a temporary breakpoint there, 1307 // and inject the call once that breakpoint is hit. 1308 // 1309 // If the goroutine is in any other state, it's not safe to inject a call. 1310 // 1311 // This function communicates back to the debugger by setting R20 and 1312 // invoking BRK to raise a breakpoint signal. Note that the signal PC of 1313 // the signal triggered by the BRK instruction is the PC where the signal 1314 // is trapped, not the next PC, so to resume execution, the debugger needs 1315 // to set the signal PC to PC+4. See the comments in the implementation for 1316 // the protocol the debugger is expected to follow. InjectDebugCall in the 1317 // runtime tests demonstrates this protocol. 1318 // 1319 // The debugger must ensure that any pointers passed to the function 1320 // obey escape analysis requirements. Specifically, it must not pass 1321 // a stack pointer to an escaping argument. debugCallV2 cannot check 1322 // this invariant. 1323 // 1324 // This is ABIInternal because Go code injects its PC directly into new 1325 // goroutine stacks. 1326 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0 1327 STP (R29, R30), -280(RSP) 1328 SUB $272, RSP, RSP 1329 SUB $8, RSP, R29 1330 // Save all registers that may contain pointers so they can be 1331 // conservatively scanned. 1332 // 1333 // We can't do anything that might clobber any of these 1334 // registers before this. 1335 STP (R27, g), (30*8)(RSP) 1336 STP (R25, R26), (28*8)(RSP) 1337 STP (R23, R24), (26*8)(RSP) 1338 STP (R21, R22), (24*8)(RSP) 1339 STP (R19, R20), (22*8)(RSP) 1340 STP (R16, R17), (20*8)(RSP) 1341 STP (R14, R15), (18*8)(RSP) 1342 STP (R12, R13), (16*8)(RSP) 1343 STP (R10, R11), (14*8)(RSP) 1344 STP (R8, R9), (12*8)(RSP) 1345 STP (R6, R7), (10*8)(RSP) 1346 STP (R4, R5), (8*8)(RSP) 1347 STP (R2, R3), (6*8)(RSP) 1348 STP (R0, R1), (4*8)(RSP) 1349 1350 // Perform a safe-point check. 1351 MOVD R30, 8(RSP) // Caller's PC 1352 CALL runtime·debugCallCheck(SB) 1353 MOVD 16(RSP), R0 1354 CBZ R0, good 1355 1356 // The safety check failed. Put the reason string at the top 1357 // of the stack. 1358 MOVD R0, 8(RSP) 1359 MOVD 24(RSP), R0 1360 MOVD R0, 16(RSP) 1361 1362 // Set R20 to 8 and invoke BRK. The debugger should get the 1363 // reason a call can't be injected from SP+8 and resume execution. 1364 MOVD $8, R20 1365 BREAK 1366 JMP restore 1367 1368 good: 1369 // Registers are saved and it's safe to make a call. 1370 // Open up a call frame, moving the stack if necessary. 1371 // 1372 // Once the frame is allocated, this will set R20 to 0 and 1373 // invoke BRK. The debugger should write the argument 1374 // frame for the call at SP+8, set up argument registers, 1375 // set the LR as the signal PC + 4, set the PC to the function 1376 // to call, set R26 to point to the closure (if a closure call), 1377 // and resume execution. 1378 // 1379 // If the function returns, this will set R20 to 1 and invoke 1380 // BRK. The debugger can then inspect any return value saved 1381 // on the stack at SP+8 and in registers. To resume execution, 1382 // the debugger should restore the LR from (SP). 1383 // 1384 // If the function panics, this will set R20 to 2 and invoke BRK. 1385 // The interface{} value of the panic will be at SP+8. The debugger 1386 // can inspect the panic value and resume execution again. 1387 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1388 CMP $MAXSIZE, R0; \ 1389 BGT 5(PC); \ 1390 MOVD $NAME(SB), R0; \ 1391 MOVD R0, 8(RSP); \ 1392 CALL runtime·debugCallWrap(SB); \ 1393 JMP restore 1394 1395 MOVD 256(RSP), R0 // the argument frame size 1396 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1397 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1398 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1399 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1400 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1401 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1402 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1403 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1404 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1405 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1406 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1407 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1408 // The frame size is too large. Report the error. 1409 MOVD $debugCallFrameTooLarge<>(SB), R0 1410 MOVD R0, 8(RSP) 1411 MOVD $20, R0 1412 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string 1413 MOVD $8, R20 1414 BREAK 1415 JMP restore 1416 1417 restore: 1418 // Calls and failures resume here. 1419 // 1420 // Set R20 to 16 and invoke BRK. The debugger should restore 1421 // all registers except for PC and RSP and resume execution. 1422 MOVD $16, R20 1423 BREAK 1424 // We must not modify flags after this point. 1425 1426 // Restore pointer-containing registers, which may have been 1427 // modified from the debugger's copy by stack copying. 1428 LDP (30*8)(RSP), (R27, g) 1429 LDP (28*8)(RSP), (R25, R26) 1430 LDP (26*8)(RSP), (R23, R24) 1431 LDP (24*8)(RSP), (R21, R22) 1432 LDP (22*8)(RSP), (R19, R20) 1433 LDP (20*8)(RSP), (R16, R17) 1434 LDP (18*8)(RSP), (R14, R15) 1435 LDP (16*8)(RSP), (R12, R13) 1436 LDP (14*8)(RSP), (R10, R11) 1437 LDP (12*8)(RSP), (R8, R9) 1438 LDP (10*8)(RSP), (R6, R7) 1439 LDP (8*8)(RSP), (R4, R5) 1440 LDP (6*8)(RSP), (R2, R3) 1441 LDP (4*8)(RSP), (R0, R1) 1442 1443 LDP -8(RSP), (R29, R27) 1444 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext 1445 MOVD -16(RSP), R30 // restore old lr 1446 JMP (R27) 1447 1448 // runtime.debugCallCheck assumes that functions defined with the 1449 // DEBUG_CALL_FN macro are safe points to inject calls. 1450 #define DEBUG_CALL_FN(NAME,MAXSIZE) \ 1451 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 1452 NO_LOCAL_POINTERS; \ 1453 MOVD $0, R20; \ 1454 BREAK; \ 1455 MOVD $1, R20; \ 1456 BREAK; \ 1457 RET 1458 DEBUG_CALL_FN(debugCall32<>, 32) 1459 DEBUG_CALL_FN(debugCall64<>, 64) 1460 DEBUG_CALL_FN(debugCall128<>, 128) 1461 DEBUG_CALL_FN(debugCall256<>, 256) 1462 DEBUG_CALL_FN(debugCall512<>, 512) 1463 DEBUG_CALL_FN(debugCall1024<>, 1024) 1464 DEBUG_CALL_FN(debugCall2048<>, 2048) 1465 DEBUG_CALL_FN(debugCall4096<>, 4096) 1466 DEBUG_CALL_FN(debugCall8192<>, 8192) 1467 DEBUG_CALL_FN(debugCall16384<>, 16384) 1468 DEBUG_CALL_FN(debugCall32768<>, 32768) 1469 DEBUG_CALL_FN(debugCall65536<>, 65536) 1470 1471 // func debugCallPanicked(val interface{}) 1472 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 1473 // Copy the panic value to the top of stack at SP+8. 1474 MOVD val_type+0(FP), R0 1475 MOVD R0, 8(RSP) 1476 MOVD val_data+8(FP), R0 1477 MOVD R0, 16(RSP) 1478 MOVD $2, R20 1479 BREAK 1480 RET 1481 1482 // Note: these functions use a special calling convention to save generated code space. 1483 // Arguments are passed in registers, but the space for those arguments are allocated 1484 // in the caller's stack frame. These stubs write the args into that stack space and 1485 // then tail call to the corresponding runtime handler. 1486 // The tail call makes these stubs disappear in backtraces. 1487 // 1488 // Defined as ABIInternal since the compiler generates ABIInternal 1489 // calls to it directly and it does not use the stack-based Go ABI. 1490 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16 1491 JMP runtime·goPanicIndex<ABIInternal>(SB) 1492 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16 1493 JMP runtime·goPanicIndexU<ABIInternal>(SB) 1494 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16 1495 MOVD R1, R0 1496 MOVD R2, R1 1497 JMP runtime·goPanicSliceAlen<ABIInternal>(SB) 1498 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16 1499 MOVD R1, R0 1500 MOVD R2, R1 1501 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB) 1502 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16 1503 MOVD R1, R0 1504 MOVD R2, R1 1505 JMP runtime·goPanicSliceAcap<ABIInternal>(SB) 1506 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16 1507 MOVD R1, R0 1508 MOVD R2, R1 1509 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB) 1510 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16 1511 JMP runtime·goPanicSliceB<ABIInternal>(SB) 1512 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16 1513 JMP runtime·goPanicSliceBU<ABIInternal>(SB) 1514 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16 1515 MOVD R2, R0 1516 MOVD R3, R1 1517 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB) 1518 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16 1519 MOVD R2, R0 1520 MOVD R3, R1 1521 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB) 1522 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16 1523 MOVD R2, R0 1524 MOVD R3, R1 1525 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB) 1526 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16 1527 MOVD R2, R0 1528 MOVD R3, R1 1529 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB) 1530 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16 1531 MOVD R1, R0 1532 MOVD R2, R1 1533 JMP runtime·goPanicSlice3B<ABIInternal>(SB) 1534 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16 1535 MOVD R1, R0 1536 MOVD R2, R1 1537 JMP runtime·goPanicSlice3BU<ABIInternal>(SB) 1538 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16 1539 JMP runtime·goPanicSlice3C<ABIInternal>(SB) 1540 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16 1541 JMP runtime·goPanicSlice3CU<ABIInternal>(SB) 1542 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16 1543 MOVD R2, R0 1544 MOVD R3, R1 1545 JMP runtime·goPanicSliceConvert<ABIInternal>(SB) 1546 1547 TEXT ·getcallerfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1548 MOVD R29, R0 1549 RET