github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 #include "cgo/abi_amd64.h" 10 11 // _rt0_amd64 is common startup code for most amd64 systems when using 12 // internal linking. This is the entry point for the program from the 13 // kernel for an ordinary -buildmode=exe program. The stack holds the 14 // number of arguments and the C-style argv. 15 TEXT _rt0_amd64(SB),NOSPLIT,$-8 16 MOVQ 0(SP), DI // argc 17 LEAQ 8(SP), SI // argv 18 JMP runtime·rt0_go(SB) 19 20 // main is common startup code for most amd64 systems when using 21 // external linking. The C startup code will call the symbol "main" 22 // passing argc and argv in the usual C ABI registers DI and SI. 23 TEXT main(SB),NOSPLIT,$-8 24 JMP runtime·rt0_go(SB) 25 26 // _rt0_amd64_lib is common startup code for most amd64 systems when 27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 28 // arrange to invoke this function as a global constructor (for 29 // c-archive) or when the shared library is loaded (for c-shared). 30 // We expect argc and argv to be passed in the usual C ABI registers 31 // DI and SI. 32 TEXT _rt0_amd64_lib(SB),NOSPLIT,$0 33 // Transition from C ABI to Go ABI. 34 PUSH_REGS_HOST_TO_ABI0() 35 36 MOVQ DI, _rt0_amd64_lib_argc<>(SB) 37 MOVQ SI, _rt0_amd64_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 // Create a new thread to finish Go runtime initialization. 43 MOVQ _cgo_sys_thread_create(SB), AX 44 TESTQ AX, AX 45 JZ nocgo 46 47 // We're calling back to C. 48 // Align stack per ELF ABI requirements. 49 MOVQ SP, BX // Callee-save in C ABI 50 ANDQ $~15, SP 51 MOVQ $_rt0_amd64_lib_go(SB), DI 52 MOVQ $0, SI 53 CALL AX 54 MOVQ BX, SP 55 JMP restore 56 57 nocgo: 58 ADJSP $16 59 MOVQ $0x800000, 0(SP) // stacksize 60 MOVQ $_rt0_amd64_lib_go(SB), AX 61 MOVQ AX, 8(SP) // fn 62 CALL runtime·newosproc0(SB) 63 ADJSP $-16 64 65 restore: 66 POP_REGS_HOST_TO_ABI0() 67 RET 68 69 // _rt0_amd64_lib_go initializes the Go runtime. 70 // This is started in a separate thread by _rt0_amd64_lib. 71 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 72 MOVQ _rt0_amd64_lib_argc<>(SB), DI 73 MOVQ _rt0_amd64_lib_argv<>(SB), SI 74 JMP runtime·rt0_go(SB) 75 76 DATA _rt0_amd64_lib_argc<>(SB)/8, $0 77 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 78 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 79 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 80 81 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0 82 // copy arguments forward on an even stack 83 MOVQ DI, AX // argc 84 MOVQ SI, BX // argv 85 SUBQ $(4*8+7), SP // 2args 2auto 86 ANDQ $~15, SP 87 MOVQ AX, 16(SP) 88 MOVQ BX, 24(SP) 89 90 // create istack out of the given (operating system) stack. 91 // _cgo_init may update stackguard. 92 MOVQ $runtime·g0(SB), DI 93 LEAQ (-64*1024+104)(SP), BX 94 MOVQ BX, g_stackguard0(DI) 95 MOVQ BX, g_stackguard1(DI) 96 MOVQ BX, (g_stack+stack_lo)(DI) 97 MOVQ SP, (g_stack+stack_hi)(DI) 98 99 // find out information about the processor we're on 100 MOVL $0, AX 101 CPUID 102 MOVL AX, SI 103 CMPL AX, $0 104 JE nocpuinfo 105 106 // Figure out how to serialize RDTSC. 107 // On Intel processors LFENCE is enough. AMD requires MFENCE. 108 // Don't know about the rest, so let's do MFENCE. 109 CMPL BX, $0x756E6547 // "Genu" 110 JNE notintel 111 CMPL DX, $0x49656E69 // "ineI" 112 JNE notintel 113 CMPL CX, $0x6C65746E // "ntel" 114 JNE notintel 115 MOVB $1, runtime·isIntel(SB) 116 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 117 notintel: 118 119 // Load EAX=1 cpuid flags 120 MOVL $1, AX 121 CPUID 122 MOVL AX, runtime·processorVersionInfo(SB) 123 124 nocpuinfo: 125 // if there is an _cgo_init, call it. 126 MOVQ _cgo_init(SB), AX 127 TESTQ AX, AX 128 JZ needtls 129 // arg 1: g0, already in DI 130 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc 131 #ifdef GOOS_android 132 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 133 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 134 // Compensate for tls_g (+16). 135 MOVQ -16(TLS), CX 136 #else 137 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS 138 MOVQ $0, CX 139 #endif 140 #ifdef GOOS_windows 141 // Adjust for the Win64 calling convention. 142 MOVQ CX, R9 // arg 4 143 MOVQ DX, R8 // arg 3 144 MOVQ SI, DX // arg 2 145 MOVQ DI, CX // arg 1 146 #endif 147 CALL AX 148 149 // update stackguard after _cgo_init 150 MOVQ $runtime·g0(SB), CX 151 MOVQ (g_stack+stack_lo)(CX), AX 152 ADDQ $const__StackGuard, AX 153 MOVQ AX, g_stackguard0(CX) 154 MOVQ AX, g_stackguard1(CX) 155 156 #ifndef GOOS_windows 157 JMP ok 158 #endif 159 needtls: 160 #ifdef GOOS_plan9 161 // skip TLS setup on Plan 9 162 JMP ok 163 #endif 164 #ifdef GOOS_solaris 165 // skip TLS setup on Solaris 166 JMP ok 167 #endif 168 #ifdef GOOS_illumos 169 // skip TLS setup on illumos 170 JMP ok 171 #endif 172 #ifdef GOOS_darwin 173 // skip TLS setup on Darwin 174 JMP ok 175 #endif 176 #ifdef GOOS_openbsd 177 // skip TLS setup on OpenBSD 178 JMP ok 179 #endif 180 181 LEAQ runtime·m0+m_tls(SB), DI 182 CALL runtime·settls(SB) 183 184 // store through it, to make sure it works 185 get_tls(BX) 186 MOVQ $0x123, g(BX) 187 MOVQ runtime·m0+m_tls(SB), AX 188 CMPQ AX, $0x123 189 JEQ 2(PC) 190 CALL runtime·abort(SB) 191 ok: 192 // set the per-goroutine and per-mach "registers" 193 get_tls(BX) 194 LEAQ runtime·g0(SB), CX 195 MOVQ CX, g(BX) 196 LEAQ runtime·m0(SB), AX 197 198 // save m->g0 = g0 199 MOVQ CX, m_g0(AX) 200 // save m0 to g0->m 201 MOVQ AX, g_m(CX) 202 203 CLD // convention is D is always left cleared 204 CALL runtime·check(SB) 205 206 MOVL 16(SP), AX // copy argc 207 MOVL AX, 0(SP) 208 MOVQ 24(SP), AX // copy argv 209 MOVQ AX, 8(SP) 210 CALL runtime·args(SB) 211 CALL runtime·osinit(SB) 212 CALL runtime·schedinit(SB) 213 214 // create a new goroutine to start program 215 MOVQ $runtime·mainPC(SB), AX // entry 216 PUSHQ AX 217 PUSHQ $0 // arg size 218 CALL runtime·newproc(SB) 219 POPQ AX 220 POPQ AX 221 222 // start this M 223 CALL runtime·mstart(SB) 224 225 CALL runtime·abort(SB) // mstart should never return 226 RET 227 228 // Prevent dead-code elimination of debugCallV2, which is 229 // intended to be called by debuggers. 230 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX 231 RET 232 233 // mainPC is a function value for runtime.main, to be passed to newproc. 234 // The reference to runtime.main is made via ABIInternal, since the 235 // actual function (not the ABI0 wrapper) is needed by newproc. 236 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB) 237 GLOBL runtime·mainPC(SB),RODATA,$8 238 239 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 240 BYTE $0xcc 241 RET 242 243 TEXT runtime·asminit(SB),NOSPLIT,$0-0 244 // No per-thread init. 245 RET 246 247 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 248 CALL runtime·mstart0(SB) 249 RET // not reached 250 251 /* 252 * go-routine 253 */ 254 255 // func gogo(buf *gobuf) 256 // restore state from Gobuf; longjmp 257 TEXT runtime·gogo(SB), NOSPLIT, $0-8 258 MOVQ buf+0(FP), BX // gobuf 259 MOVQ gobuf_g(BX), DX 260 MOVQ 0(DX), CX // make sure g != nil 261 JMP gogo<>(SB) 262 263 TEXT gogo<>(SB), NOSPLIT, $0 264 get_tls(CX) 265 MOVQ DX, g(CX) 266 MOVQ DX, R14 // set the g register 267 MOVQ gobuf_sp(BX), SP // restore SP 268 MOVQ gobuf_ret(BX), AX 269 MOVQ gobuf_ctxt(BX), DX 270 MOVQ gobuf_bp(BX), BP 271 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 272 MOVQ $0, gobuf_ret(BX) 273 MOVQ $0, gobuf_ctxt(BX) 274 MOVQ $0, gobuf_bp(BX) 275 MOVQ gobuf_pc(BX), BX 276 JMP BX 277 278 // func mcall(fn func(*g)) 279 // Switch to m->g0's stack, call fn(g). 280 // Fn must never return. It should gogo(&g->sched) 281 // to keep running g. 282 #ifdef GOEXPERIMENT_regabiargs 283 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8 284 MOVQ AX, DX // DX = fn 285 286 // save state in g->sched 287 MOVQ 0(SP), BX // caller's PC 288 MOVQ BX, (g_sched+gobuf_pc)(R14) 289 LEAQ fn+0(FP), BX // caller's SP 290 MOVQ BX, (g_sched+gobuf_sp)(R14) 291 MOVQ BP, (g_sched+gobuf_bp)(R14) 292 293 // switch to m->g0 & its stack, call fn 294 MOVQ g_m(R14), BX 295 MOVQ m_g0(BX), SI // SI = g.m.g0 296 CMPQ SI, R14 // if g == m->g0 call badmcall 297 JNE goodm 298 JMP runtime·badmcall(SB) 299 goodm: 300 MOVQ R14, AX // AX (and arg 0) = g 301 MOVQ SI, R14 // g = g.m.g0 302 get_tls(CX) // Set G in TLS 303 MOVQ R14, g(CX) 304 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp 305 PUSHQ AX // open up space for fn's arg spill slot 306 MOVQ 0(DX), R12 307 CALL R12 // fn(g) 308 POPQ AX 309 JMP runtime·badmcall2(SB) 310 RET 311 #else 312 TEXT runtime·mcall(SB), NOSPLIT, $0-8 313 MOVQ fn+0(FP), DI 314 315 get_tls(CX) 316 MOVQ g(CX), AX // save state in g->sched 317 MOVQ 0(SP), BX // caller's PC 318 MOVQ BX, (g_sched+gobuf_pc)(AX) 319 LEAQ fn+0(FP), BX // caller's SP 320 MOVQ BX, (g_sched+gobuf_sp)(AX) 321 MOVQ BP, (g_sched+gobuf_bp)(AX) 322 323 // switch to m->g0 & its stack, call fn 324 MOVQ g(CX), BX 325 MOVQ g_m(BX), BX 326 MOVQ m_g0(BX), SI 327 CMPQ SI, AX // if g == m->g0 call badmcall 328 JNE 3(PC) 329 MOVQ $runtime·badmcall(SB), AX 330 JMP AX 331 MOVQ SI, g(CX) // g = m->g0 332 MOVQ SI, R14 // set the g register 333 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 334 PUSHQ AX 335 MOVQ DI, DX 336 MOVQ 0(DI), DI 337 CALL DI 338 POPQ AX 339 MOVQ $runtime·badmcall2(SB), AX 340 JMP AX 341 RET 342 #endif 343 344 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 345 // of the G stack. We need to distinguish the routine that 346 // lives at the bottom of the G stack from the one that lives 347 // at the top of the system stack because the one at the top of 348 // the system stack terminates the stack walk (see topofstack()). 349 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 350 RET 351 352 // func systemstack(fn func()) 353 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 354 MOVQ fn+0(FP), DI // DI = fn 355 get_tls(CX) 356 MOVQ g(CX), AX // AX = g 357 MOVQ g_m(AX), BX // BX = m 358 359 CMPQ AX, m_gsignal(BX) 360 JEQ noswitch 361 362 MOVQ m_g0(BX), DX // DX = g0 363 CMPQ AX, DX 364 JEQ noswitch 365 366 CMPQ AX, m_curg(BX) 367 JNE bad 368 369 // switch stacks 370 // save our state in g->sched. Pretend to 371 // be systemstack_switch if the G stack is scanned. 372 CALL gosave_systemstack_switch<>(SB) 373 374 // switch to g0 375 MOVQ DX, g(CX) 376 MOVQ DX, R14 // set the g register 377 MOVQ (g_sched+gobuf_sp)(DX), BX 378 MOVQ BX, SP 379 380 // call target function 381 MOVQ DI, DX 382 MOVQ 0(DI), DI 383 CALL DI 384 385 // switch back to g 386 get_tls(CX) 387 MOVQ g(CX), AX 388 MOVQ g_m(AX), BX 389 MOVQ m_curg(BX), AX 390 MOVQ AX, g(CX) 391 MOVQ (g_sched+gobuf_sp)(AX), SP 392 MOVQ $0, (g_sched+gobuf_sp)(AX) 393 RET 394 395 noswitch: 396 // already on m stack; tail call the function 397 // Using a tail call here cleans up tracebacks since we won't stop 398 // at an intermediate systemstack. 399 MOVQ DI, DX 400 MOVQ 0(DI), DI 401 JMP DI 402 403 bad: 404 // Bad: g is not gsignal, not g0, not curg. What is it? 405 MOVQ $runtime·badsystemstack(SB), AX 406 CALL AX 407 INT $3 408 409 410 /* 411 * support for morestack 412 */ 413 414 // Called during function prolog when more stack is needed. 415 // 416 // The traceback routines see morestack on a g0 as being 417 // the top of a stack (for example, morestack calling newstack 418 // calling the scheduler calling newm calling gc), so we must 419 // record an argument size. For that purpose, it has no arguments. 420 TEXT runtime·morestack(SB),NOSPLIT,$0-0 421 // Cannot grow scheduler stack (m->g0). 422 get_tls(CX) 423 MOVQ g(CX), BX 424 MOVQ g_m(BX), BX 425 MOVQ m_g0(BX), SI 426 CMPQ g(CX), SI 427 JNE 3(PC) 428 CALL runtime·badmorestackg0(SB) 429 CALL runtime·abort(SB) 430 431 // Cannot grow signal stack (m->gsignal). 432 MOVQ m_gsignal(BX), SI 433 CMPQ g(CX), SI 434 JNE 3(PC) 435 CALL runtime·badmorestackgsignal(SB) 436 CALL runtime·abort(SB) 437 438 // Called from f. 439 // Set m->morebuf to f's caller. 440 NOP SP // tell vet SP changed - stop checking offsets 441 MOVQ 8(SP), AX // f's caller's PC 442 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 443 LEAQ 16(SP), AX // f's caller's SP 444 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 445 get_tls(CX) 446 MOVQ g(CX), SI 447 MOVQ SI, (m_morebuf+gobuf_g)(BX) 448 449 // Set g->sched to context in f. 450 MOVQ 0(SP), AX // f's PC 451 MOVQ AX, (g_sched+gobuf_pc)(SI) 452 LEAQ 8(SP), AX // f's SP 453 MOVQ AX, (g_sched+gobuf_sp)(SI) 454 MOVQ BP, (g_sched+gobuf_bp)(SI) 455 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 456 457 // Call newstack on m->g0's stack. 458 MOVQ m_g0(BX), BX 459 MOVQ BX, g(CX) 460 MOVQ (g_sched+gobuf_sp)(BX), SP 461 CALL runtime·newstack(SB) 462 CALL runtime·abort(SB) // crash if newstack returns 463 RET 464 465 // morestack but not preserving ctxt. 466 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 467 MOVL $0, DX 468 JMP runtime·morestack(SB) 469 470 #ifdef GOEXPERIMENT_regabireflect 471 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12. 472 TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0 473 MOVQ AX, 0(R12) 474 MOVQ BX, 8(R12) 475 MOVQ CX, 16(R12) 476 MOVQ DI, 24(R12) 477 MOVQ SI, 32(R12) 478 MOVQ R8, 40(R12) 479 MOVQ R9, 48(R12) 480 MOVQ R10, 56(R12) 481 MOVQ R11, 64(R12) 482 MOVQ X0, 72(R12) 483 MOVQ X1, 80(R12) 484 MOVQ X2, 88(R12) 485 MOVQ X3, 96(R12) 486 MOVQ X4, 104(R12) 487 MOVQ X5, 112(R12) 488 MOVQ X6, 120(R12) 489 MOVQ X7, 128(R12) 490 MOVQ X8, 136(R12) 491 MOVQ X9, 144(R12) 492 MOVQ X10, 152(R12) 493 MOVQ X11, 160(R12) 494 MOVQ X12, 168(R12) 495 MOVQ X13, 176(R12) 496 MOVQ X14, 184(R12) 497 RET 498 499 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12. 500 TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0 501 MOVQ 0(R12), AX 502 MOVQ 8(R12), BX 503 MOVQ 16(R12), CX 504 MOVQ 24(R12), DI 505 MOVQ 32(R12), SI 506 MOVQ 40(R12), R8 507 MOVQ 48(R12), R9 508 MOVQ 56(R12), R10 509 MOVQ 64(R12), R11 510 MOVQ 72(R12), X0 511 MOVQ 80(R12), X1 512 MOVQ 88(R12), X2 513 MOVQ 96(R12), X3 514 MOVQ 104(R12), X4 515 MOVQ 112(R12), X5 516 MOVQ 120(R12), X6 517 MOVQ 128(R12), X7 518 MOVQ 136(R12), X8 519 MOVQ 144(R12), X9 520 MOVQ 152(R12), X10 521 MOVQ 160(R12), X11 522 MOVQ 168(R12), X12 523 MOVQ 176(R12), X13 524 MOVQ 184(R12), X14 525 RET 526 #else 527 // spillArgs stores return values from registers to a pointer in R12. 528 TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0 529 RET 530 531 // unspillArgs loads args into registers from a pointer in R12. 532 TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0 533 RET 534 #endif 535 536 // reflectcall: call a function with the given argument list 537 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 538 // we don't have variable-sized frames, so we use a small number 539 // of constant-sized-frame functions to encode a few bits of size in the pc. 540 // Caution: ugly multiline assembly macros in your future! 541 542 #define DISPATCH(NAME,MAXSIZE) \ 543 CMPQ CX, $MAXSIZE; \ 544 JA 3(PC); \ 545 MOVQ $NAME(SB), AX; \ 546 JMP AX 547 // Note: can't just "JMP NAME(SB)" - bad inlining results. 548 549 TEXT ·reflectcall(SB), NOSPLIT, $0-48 550 MOVLQZX frameSize+32(FP), CX 551 DISPATCH(runtime·call16, 16) 552 DISPATCH(runtime·call32, 32) 553 DISPATCH(runtime·call64, 64) 554 DISPATCH(runtime·call128, 128) 555 DISPATCH(runtime·call256, 256) 556 DISPATCH(runtime·call512, 512) 557 DISPATCH(runtime·call1024, 1024) 558 DISPATCH(runtime·call2048, 2048) 559 DISPATCH(runtime·call4096, 4096) 560 DISPATCH(runtime·call8192, 8192) 561 DISPATCH(runtime·call16384, 16384) 562 DISPATCH(runtime·call32768, 32768) 563 DISPATCH(runtime·call65536, 65536) 564 DISPATCH(runtime·call131072, 131072) 565 DISPATCH(runtime·call262144, 262144) 566 DISPATCH(runtime·call524288, 524288) 567 DISPATCH(runtime·call1048576, 1048576) 568 DISPATCH(runtime·call2097152, 2097152) 569 DISPATCH(runtime·call4194304, 4194304) 570 DISPATCH(runtime·call8388608, 8388608) 571 DISPATCH(runtime·call16777216, 16777216) 572 DISPATCH(runtime·call33554432, 33554432) 573 DISPATCH(runtime·call67108864, 67108864) 574 DISPATCH(runtime·call134217728, 134217728) 575 DISPATCH(runtime·call268435456, 268435456) 576 DISPATCH(runtime·call536870912, 536870912) 577 DISPATCH(runtime·call1073741824, 1073741824) 578 MOVQ $runtime·badreflectcall(SB), AX 579 JMP AX 580 581 #define CALLFN(NAME,MAXSIZE) \ 582 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \ 583 NO_LOCAL_POINTERS; \ 584 /* copy arguments to stack */ \ 585 MOVQ stackArgs+16(FP), SI; \ 586 MOVLQZX stackArgsSize+24(FP), CX; \ 587 MOVQ SP, DI; \ 588 REP;MOVSB; \ 589 /* set up argument registers */ \ 590 MOVQ regArgs+40(FP), R12; \ 591 CALL ·unspillArgs<ABIInternal>(SB); \ 592 /* call function */ \ 593 MOVQ f+8(FP), DX; \ 594 PCDATA $PCDATA_StackMapIndex, $0; \ 595 MOVQ (DX), R12; \ 596 CALL R12; \ 597 /* copy register return values back */ \ 598 MOVQ regArgs+40(FP), R12; \ 599 CALL ·spillArgs<ABIInternal>(SB); \ 600 MOVLQZX stackArgsSize+24(FP), CX; \ 601 MOVLQZX stackRetOffset+28(FP), BX; \ 602 MOVQ stackArgs+16(FP), DI; \ 603 MOVQ stackArgsType+0(FP), DX; \ 604 MOVQ SP, SI; \ 605 ADDQ BX, DI; \ 606 ADDQ BX, SI; \ 607 SUBQ BX, CX; \ 608 CALL callRet<>(SB); \ 609 RET 610 611 // callRet copies return values back at the end of call*. This is a 612 // separate function so it can allocate stack space for the arguments 613 // to reflectcallmove. It does not follow the Go ABI; it expects its 614 // arguments in registers. 615 TEXT callRet<>(SB), NOSPLIT, $40-0 616 NO_LOCAL_POINTERS 617 MOVQ DX, 0(SP) 618 MOVQ DI, 8(SP) 619 MOVQ SI, 16(SP) 620 MOVQ CX, 24(SP) 621 MOVQ R12, 32(SP) 622 CALL runtime·reflectcallmove(SB) 623 RET 624 625 CALLFN(·call16, 16) 626 CALLFN(·call32, 32) 627 CALLFN(·call64, 64) 628 CALLFN(·call128, 128) 629 CALLFN(·call256, 256) 630 CALLFN(·call512, 512) 631 CALLFN(·call1024, 1024) 632 CALLFN(·call2048, 2048) 633 CALLFN(·call4096, 4096) 634 CALLFN(·call8192, 8192) 635 CALLFN(·call16384, 16384) 636 CALLFN(·call32768, 32768) 637 CALLFN(·call65536, 65536) 638 CALLFN(·call131072, 131072) 639 CALLFN(·call262144, 262144) 640 CALLFN(·call524288, 524288) 641 CALLFN(·call1048576, 1048576) 642 CALLFN(·call2097152, 2097152) 643 CALLFN(·call4194304, 4194304) 644 CALLFN(·call8388608, 8388608) 645 CALLFN(·call16777216, 16777216) 646 CALLFN(·call33554432, 33554432) 647 CALLFN(·call67108864, 67108864) 648 CALLFN(·call134217728, 134217728) 649 CALLFN(·call268435456, 268435456) 650 CALLFN(·call536870912, 536870912) 651 CALLFN(·call1073741824, 1073741824) 652 653 TEXT runtime·procyield(SB),NOSPLIT,$0-0 654 MOVL cycles+0(FP), AX 655 again: 656 PAUSE 657 SUBL $1, AX 658 JNZ again 659 RET 660 661 662 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 663 // Stores are already ordered on x86, so this is just a 664 // compile barrier. 665 RET 666 667 // func jmpdefer(fv *funcval, argp uintptr) 668 // argp is a caller SP. 669 // called from deferreturn. 670 // 1. pop the caller 671 // 2. sub 5 bytes from the callers return 672 // 3. jmp to the argument 673 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 674 MOVQ fv+0(FP), DX // fn 675 MOVQ argp+8(FP), BX // caller sp 676 LEAQ -8(BX), SP // caller sp after CALL 677 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use) 678 SUBQ $5, (SP) // return to CALL again 679 MOVQ 0(DX), BX 680 JMP BX // but first run the deferred function 681 682 // Save state of caller into g->sched, 683 // but using fake PC from systemstack_switch. 684 // Must only be called from functions with no locals ($0) 685 // or else unwinding from systemstack_switch is incorrect. 686 // Smashes R9. 687 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0 688 #ifndef GOEXPERIMENT_regabig 689 get_tls(R14) 690 MOVQ g(R14), R14 691 #endif 692 MOVQ $runtime·systemstack_switch(SB), R9 693 MOVQ R9, (g_sched+gobuf_pc)(R14) 694 LEAQ 8(SP), R9 695 MOVQ R9, (g_sched+gobuf_sp)(R14) 696 MOVQ $0, (g_sched+gobuf_ret)(R14) 697 MOVQ BP, (g_sched+gobuf_bp)(R14) 698 // Assert ctxt is zero. See func save. 699 MOVQ (g_sched+gobuf_ctxt)(R14), R9 700 TESTQ R9, R9 701 JZ 2(PC) 702 CALL runtime·abort(SB) 703 RET 704 705 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 706 // Call fn(arg) aligned appropriately for the gcc ABI. 707 // Called on a system stack, and there may be no g yet (during needm). 708 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16 709 MOVQ fn+0(FP), AX 710 MOVQ arg+8(FP), BX 711 MOVQ SP, DX 712 SUBQ $32, SP 713 ANDQ $~15, SP // alignment 714 MOVQ DX, 8(SP) 715 MOVQ BX, DI // DI = first argument in AMD64 ABI 716 MOVQ BX, CX // CX = first argument in Win64 717 CALL AX 718 MOVQ 8(SP), DX 719 MOVQ DX, SP 720 RET 721 722 // func asmcgocall(fn, arg unsafe.Pointer) int32 723 // Call fn(arg) on the scheduler stack, 724 // aligned appropriately for the gcc ABI. 725 // See cgocall.go for more details. 726 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 727 MOVQ fn+0(FP), AX 728 MOVQ arg+8(FP), BX 729 730 MOVQ SP, DX 731 732 // Figure out if we need to switch to m->g0 stack. 733 // We get called to create new OS threads too, and those 734 // come in on the m->g0 stack already. 735 get_tls(CX) 736 MOVQ g(CX), R8 737 CMPQ R8, $0 738 JEQ nosave 739 MOVQ g_m(R8), R8 740 MOVQ m_g0(R8), SI 741 MOVQ g(CX), DI 742 CMPQ SI, DI 743 JEQ nosave 744 MOVQ m_gsignal(R8), SI 745 CMPQ SI, DI 746 JEQ nosave 747 748 // Switch to system stack. 749 MOVQ m_g0(R8), SI 750 CALL gosave_systemstack_switch<>(SB) 751 MOVQ SI, g(CX) 752 MOVQ (g_sched+gobuf_sp)(SI), SP 753 754 // Now on a scheduling stack (a pthread-created stack). 755 // Make sure we have enough room for 4 stack-backed fast-call 756 // registers as per windows amd64 calling convention. 757 SUBQ $64, SP 758 ANDQ $~15, SP // alignment for gcc ABI 759 MOVQ DI, 48(SP) // save g 760 MOVQ (g_stack+stack_hi)(DI), DI 761 SUBQ DX, DI 762 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 763 MOVQ BX, DI // DI = first argument in AMD64 ABI 764 MOVQ BX, CX // CX = first argument in Win64 765 CALL AX 766 767 // Restore registers, g, stack pointer. 768 get_tls(CX) 769 MOVQ 48(SP), DI 770 MOVQ (g_stack+stack_hi)(DI), SI 771 SUBQ 40(SP), SI 772 MOVQ DI, g(CX) 773 MOVQ SI, SP 774 775 MOVL AX, ret+16(FP) 776 RET 777 778 nosave: 779 // Running on a system stack, perhaps even without a g. 780 // Having no g can happen during thread creation or thread teardown 781 // (see needm/dropm on Solaris, for example). 782 // This code is like the above sequence but without saving/restoring g 783 // and without worrying about the stack moving out from under us 784 // (because we're on a system stack, not a goroutine stack). 785 // The above code could be used directly if already on a system stack, 786 // but then the only path through this code would be a rare case on Solaris. 787 // Using this code for all "already on system stack" calls exercises it more, 788 // which should help keep it correct. 789 SUBQ $64, SP 790 ANDQ $~15, SP 791 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging 792 MOVQ DX, 40(SP) // save original stack pointer 793 MOVQ BX, DI // DI = first argument in AMD64 ABI 794 MOVQ BX, CX // CX = first argument in Win64 795 CALL AX 796 MOVQ 40(SP), SI // restore original stack pointer 797 MOVQ SI, SP 798 MOVL AX, ret+16(FP) 799 RET 800 801 #ifdef GOOS_windows 802 // Dummy TLS that's used on Windows so that we don't crash trying 803 // to restore the G register in needm. needm and its callees are 804 // very careful never to actually use the G, the TLS just can't be 805 // unset since we're in Go code. 806 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize 807 #endif 808 809 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 810 // See cgocall.go for more details. 811 TEXT ·cgocallback(SB),NOSPLIT,$24-24 812 NO_LOCAL_POINTERS 813 814 // If g is nil, Go did not create the current thread. 815 // Call needm to obtain one m for temporary use. 816 // In this case, we're running on the thread stack, so there's 817 // lots of space, but the linker doesn't know. Hide the call from 818 // the linker analysis by using an indirect call through AX. 819 get_tls(CX) 820 #ifdef GOOS_windows 821 MOVL $0, BX 822 CMPQ CX, $0 823 JEQ 2(PC) 824 #endif 825 MOVQ g(CX), BX 826 CMPQ BX, $0 827 JEQ needm 828 MOVQ g_m(BX), BX 829 MOVQ BX, savedm-8(SP) // saved copy of oldm 830 JMP havem 831 needm: 832 #ifdef GOOS_windows 833 // Set up a dummy TLS value. needm is careful not to use it, 834 // but it needs to be there to prevent autogenerated code from 835 // crashing when it loads from it. 836 // We don't need to clear it or anything later because needm 837 // will set up TLS properly. 838 MOVQ $zeroTLS<>(SB), DI 839 CALL runtime·settls(SB) 840 #endif 841 // On some platforms (Windows) we cannot call needm through 842 // an ABI wrapper because there's no TLS set up, and the ABI 843 // wrapper will try to restore the G register (R14) from TLS. 844 // Clear X15 because Go expects it and we're not calling 845 // through a wrapper, but otherwise avoid setting the G 846 // register in the wrapper and call needm directly. It 847 // takes no arguments and doesn't return any values so 848 // there's no need to handle that. Clear R14 so that there's 849 // a bad value in there, in case needm tries to use it. 850 XORPS X15, X15 851 XORQ R14, R14 852 MOVQ $runtime·needm<ABIInternal>(SB), AX 853 CALL AX 854 MOVQ $0, savedm-8(SP) // dropm on return 855 get_tls(CX) 856 MOVQ g(CX), BX 857 MOVQ g_m(BX), BX 858 859 // Set m->sched.sp = SP, so that if a panic happens 860 // during the function we are about to execute, it will 861 // have a valid SP to run on the g0 stack. 862 // The next few lines (after the havem label) 863 // will save this SP onto the stack and then write 864 // the same SP back to m->sched.sp. That seems redundant, 865 // but if an unrecovered panic happens, unwindm will 866 // restore the g->sched.sp from the stack location 867 // and then systemstack will try to use it. If we don't set it here, 868 // that restored SP will be uninitialized (typically 0) and 869 // will not be usable. 870 MOVQ m_g0(BX), SI 871 MOVQ SP, (g_sched+gobuf_sp)(SI) 872 873 havem: 874 // Now there's a valid m, and we're running on its m->g0. 875 // Save current m->g0->sched.sp on stack and then set it to SP. 876 // Save current sp in m->g0->sched.sp in preparation for 877 // switch back to m->curg stack. 878 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 879 MOVQ m_g0(BX), SI 880 MOVQ (g_sched+gobuf_sp)(SI), AX 881 MOVQ AX, 0(SP) 882 MOVQ SP, (g_sched+gobuf_sp)(SI) 883 884 // Switch to m->curg stack and call runtime.cgocallbackg. 885 // Because we are taking over the execution of m->curg 886 // but *not* resuming what had been running, we need to 887 // save that information (m->curg->sched) so we can restore it. 888 // We can restore m->curg->sched.sp easily, because calling 889 // runtime.cgocallbackg leaves SP unchanged upon return. 890 // To save m->curg->sched.pc, we push it onto the curg stack and 891 // open a frame the same size as cgocallback's g0 frame. 892 // Once we switch to the curg stack, the pushed PC will appear 893 // to be the return PC of cgocallback, so that the traceback 894 // will seamlessly trace back into the earlier calls. 895 MOVQ m_curg(BX), SI 896 MOVQ SI, g(CX) 897 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 898 MOVQ (g_sched+gobuf_pc)(SI), BX 899 MOVQ BX, -8(DI) // "push" return PC on the g stack 900 // Gather our arguments into registers. 901 MOVQ fn+0(FP), BX 902 MOVQ frame+8(FP), CX 903 MOVQ ctxt+16(FP), DX 904 // Compute the size of the frame, including return PC and, if 905 // GOEXPERIMENT=framepointer, the saved base pointer 906 LEAQ fn+0(FP), AX 907 SUBQ SP, AX // AX is our actual frame size 908 SUBQ AX, DI // Allocate the same frame size on the g stack 909 MOVQ DI, SP 910 911 MOVQ BX, 0(SP) 912 MOVQ CX, 8(SP) 913 MOVQ DX, 16(SP) 914 MOVQ $runtime·cgocallbackg(SB), AX 915 CALL AX // indirect call to bypass nosplit check. We're on a different stack now. 916 917 // Compute the size of the frame again. FP and SP have 918 // completely different values here than they did above, 919 // but only their difference matters. 920 LEAQ fn+0(FP), AX 921 SUBQ SP, AX 922 923 // Restore g->sched (== m->curg->sched) from saved values. 924 get_tls(CX) 925 MOVQ g(CX), SI 926 MOVQ SP, DI 927 ADDQ AX, DI 928 MOVQ -8(DI), BX 929 MOVQ BX, (g_sched+gobuf_pc)(SI) 930 MOVQ DI, (g_sched+gobuf_sp)(SI) 931 932 // Switch back to m->g0's stack and restore m->g0->sched.sp. 933 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 934 // so we do not have to restore it.) 935 MOVQ g(CX), BX 936 MOVQ g_m(BX), BX 937 MOVQ m_g0(BX), SI 938 MOVQ SI, g(CX) 939 MOVQ (g_sched+gobuf_sp)(SI), SP 940 MOVQ 0(SP), AX 941 MOVQ AX, (g_sched+gobuf_sp)(SI) 942 943 // If the m on entry was nil, we called needm above to borrow an m 944 // for the duration of the call. Since the call is over, return it with dropm. 945 MOVQ savedm-8(SP), BX 946 CMPQ BX, $0 947 JNE done 948 MOVQ $runtime·dropm(SB), AX 949 CALL AX 950 #ifdef GOOS_windows 951 // We need to clear the TLS pointer in case the next 952 // thread that comes into Go tries to reuse that space 953 // but uses the same M. 954 XORQ DI, DI 955 CALL runtime·settls(SB) 956 #endif 957 done: 958 959 // Done! 960 RET 961 962 // func setg(gg *g) 963 // set g. for use by needm. 964 TEXT runtime·setg(SB), NOSPLIT, $0-8 965 MOVQ gg+0(FP), BX 966 get_tls(CX) 967 MOVQ BX, g(CX) 968 RET 969 970 // void setg_gcc(G*); set g called from gcc. 971 TEXT setg_gcc<>(SB),NOSPLIT,$0 972 get_tls(AX) 973 MOVQ DI, g(AX) 974 MOVQ DI, R14 // set the g register 975 RET 976 977 TEXT runtime·abort(SB),NOSPLIT,$0-0 978 INT $3 979 loop: 980 JMP loop 981 982 // check that SP is in range [g->stack.lo, g->stack.hi) 983 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 984 get_tls(CX) 985 MOVQ g(CX), AX 986 CMPQ (g_stack+stack_hi)(AX), SP 987 JHI 2(PC) 988 CALL runtime·abort(SB) 989 CMPQ SP, (g_stack+stack_lo)(AX) 990 JHI 2(PC) 991 CALL runtime·abort(SB) 992 RET 993 994 // func cputicks() int64 995 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 996 CMPB runtime·lfenceBeforeRdtsc(SB), $1 997 JNE mfence 998 LFENCE 999 JMP done 1000 mfence: 1001 MFENCE 1002 done: 1003 RDTSC 1004 SHLQ $32, DX 1005 ADDQ DX, AX 1006 MOVQ AX, ret+0(FP) 1007 RET 1008 1009 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr 1010 // hash function using AES hardware instructions 1011 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32 1012 #ifdef GOEXPERIMENT_regabiargs 1013 // AX = ptr to data 1014 // BX = seed 1015 // CX = size 1016 #endif 1017 CMPB runtime·useAeshash(SB), $0 1018 JEQ noaes 1019 #ifndef GOEXPERIMENT_regabiargs 1020 MOVQ p+0(FP), AX // ptr to data 1021 MOVQ s+16(FP), CX // size 1022 LEAQ ret+24(FP), DX 1023 #endif 1024 JMP aeshashbody<>(SB) 1025 noaes: 1026 JMP runtime·memhashFallback<ABIInternal>(SB) 1027 1028 // func strhash(p unsafe.Pointer, h uintptr) uintptr 1029 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24 1030 #ifdef GOEXPERIMENT_regabiargs 1031 // AX = ptr to string struct 1032 // BX = seed 1033 #endif 1034 CMPB runtime·useAeshash(SB), $0 1035 JEQ noaes 1036 #ifndef GOEXPERIMENT_regabiargs 1037 MOVQ p+0(FP), AX // ptr to string struct 1038 #endif 1039 MOVQ 8(AX), CX // length of string 1040 MOVQ (AX), AX // string data 1041 #ifndef GOEXPERIMENT_regabiargs 1042 LEAQ ret+16(FP), DX 1043 #endif 1044 JMP aeshashbody<>(SB) 1045 noaes: 1046 JMP runtime·strhashFallback<ABIInternal>(SB) 1047 1048 // AX: data 1049 #ifdef GOEXPERIMENT_regabiargs 1050 // BX: hash seed 1051 #else 1052 // h+8(FP): hash seed 1053 #endif 1054 // CX: length 1055 #ifdef GOEXPERIMENT_regabiargs 1056 // At return: AX = return value 1057 #else 1058 // DX: address to put return value 1059 #endif 1060 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 1061 // Fill an SSE register with our seeds. 1062 #ifdef GOEXPERIMENT_regabiargs 1063 MOVQ BX, X0 // 64 bits of per-table hash seed 1064 #else 1065 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed 1066 #endif 1067 PINSRW $4, CX, X0 // 16 bits of length 1068 PSHUFHW $0, X0, X0 // repeat length 4 times total 1069 MOVO X0, X1 // save unscrambled seed 1070 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 1071 AESENC X0, X0 // scramble seed 1072 1073 CMPQ CX, $16 1074 JB aes0to15 1075 JE aes16 1076 CMPQ CX, $32 1077 JBE aes17to32 1078 CMPQ CX, $64 1079 JBE aes33to64 1080 CMPQ CX, $128 1081 JBE aes65to128 1082 JMP aes129plus 1083 1084 aes0to15: 1085 TESTQ CX, CX 1086 JE aes0 1087 1088 ADDQ $16, AX 1089 TESTW $0xff0, AX 1090 JE endofpage 1091 1092 // 16 bytes loaded at this address won't cross 1093 // a page boundary, so we can load it directly. 1094 MOVOU -16(AX), X1 1095 ADDQ CX, CX 1096 MOVQ $masks<>(SB), AX 1097 PAND (AX)(CX*8), X1 1098 final1: 1099 PXOR X0, X1 // xor data with seed 1100 AESENC X1, X1 // scramble combo 3 times 1101 AESENC X1, X1 1102 AESENC X1, X1 1103 #ifdef GOEXPERIMENT_regabiargs 1104 MOVQ X1, AX // return X1 1105 #else 1106 MOVQ X1, (DX) 1107 #endif 1108 RET 1109 1110 endofpage: 1111 // address ends in 1111xxxx. Might be up against 1112 // a page boundary, so load ending at last byte. 1113 // Then shift bytes down using pshufb. 1114 MOVOU -32(AX)(CX*1), X1 1115 ADDQ CX, CX 1116 MOVQ $shifts<>(SB), AX 1117 PSHUFB (AX)(CX*8), X1 1118 JMP final1 1119 1120 aes0: 1121 // Return scrambled input seed 1122 AESENC X0, X0 1123 #ifdef GOEXPERIMENT_regabiargs 1124 MOVQ X0, AX // return X0 1125 #else 1126 MOVQ X0, (DX) 1127 #endif 1128 RET 1129 1130 aes16: 1131 MOVOU (AX), X1 1132 JMP final1 1133 1134 aes17to32: 1135 // make second starting seed 1136 PXOR runtime·aeskeysched+16(SB), X1 1137 AESENC X1, X1 1138 1139 // load data to be hashed 1140 MOVOU (AX), X2 1141 MOVOU -16(AX)(CX*1), X3 1142 1143 // xor with seed 1144 PXOR X0, X2 1145 PXOR X1, X3 1146 1147 // scramble 3 times 1148 AESENC X2, X2 1149 AESENC X3, X3 1150 AESENC X2, X2 1151 AESENC X3, X3 1152 AESENC X2, X2 1153 AESENC X3, X3 1154 1155 // combine results 1156 PXOR X3, X2 1157 #ifdef GOEXPERIMENT_regabiargs 1158 MOVQ X2, AX // return X2 1159 #else 1160 MOVQ X2, (DX) 1161 #endif 1162 RET 1163 1164 aes33to64: 1165 // make 3 more starting seeds 1166 MOVO X1, X2 1167 MOVO X1, X3 1168 PXOR runtime·aeskeysched+16(SB), X1 1169 PXOR runtime·aeskeysched+32(SB), X2 1170 PXOR runtime·aeskeysched+48(SB), X3 1171 AESENC X1, X1 1172 AESENC X2, X2 1173 AESENC X3, X3 1174 1175 MOVOU (AX), X4 1176 MOVOU 16(AX), X5 1177 MOVOU -32(AX)(CX*1), X6 1178 MOVOU -16(AX)(CX*1), X7 1179 1180 PXOR X0, X4 1181 PXOR X1, X5 1182 PXOR X2, X6 1183 PXOR X3, X7 1184 1185 AESENC X4, X4 1186 AESENC X5, X5 1187 AESENC X6, X6 1188 AESENC X7, X7 1189 1190 AESENC X4, X4 1191 AESENC X5, X5 1192 AESENC X6, X6 1193 AESENC X7, X7 1194 1195 AESENC X4, X4 1196 AESENC X5, X5 1197 AESENC X6, X6 1198 AESENC X7, X7 1199 1200 PXOR X6, X4 1201 PXOR X7, X5 1202 PXOR X5, X4 1203 #ifdef GOEXPERIMENT_regabiargs 1204 MOVQ X4, AX // return X4 1205 #else 1206 MOVQ X4, (DX) 1207 #endif 1208 RET 1209 1210 aes65to128: 1211 // make 7 more starting seeds 1212 MOVO X1, X2 1213 MOVO X1, X3 1214 MOVO X1, X4 1215 MOVO X1, X5 1216 MOVO X1, X6 1217 MOVO X1, X7 1218 PXOR runtime·aeskeysched+16(SB), X1 1219 PXOR runtime·aeskeysched+32(SB), X2 1220 PXOR runtime·aeskeysched+48(SB), X3 1221 PXOR runtime·aeskeysched+64(SB), X4 1222 PXOR runtime·aeskeysched+80(SB), X5 1223 PXOR runtime·aeskeysched+96(SB), X6 1224 PXOR runtime·aeskeysched+112(SB), X7 1225 AESENC X1, X1 1226 AESENC X2, X2 1227 AESENC X3, X3 1228 AESENC X4, X4 1229 AESENC X5, X5 1230 AESENC X6, X6 1231 AESENC X7, X7 1232 1233 // load data 1234 MOVOU (AX), X8 1235 MOVOU 16(AX), X9 1236 MOVOU 32(AX), X10 1237 MOVOU 48(AX), X11 1238 MOVOU -64(AX)(CX*1), X12 1239 MOVOU -48(AX)(CX*1), X13 1240 MOVOU -32(AX)(CX*1), X14 1241 MOVOU -16(AX)(CX*1), X15 1242 1243 // xor with seed 1244 PXOR X0, X8 1245 PXOR X1, X9 1246 PXOR X2, X10 1247 PXOR X3, X11 1248 PXOR X4, X12 1249 PXOR X5, X13 1250 PXOR X6, X14 1251 PXOR X7, X15 1252 1253 // scramble 3 times 1254 AESENC X8, X8 1255 AESENC X9, X9 1256 AESENC X10, X10 1257 AESENC X11, X11 1258 AESENC X12, X12 1259 AESENC X13, X13 1260 AESENC X14, X14 1261 AESENC X15, X15 1262 1263 AESENC X8, X8 1264 AESENC X9, X9 1265 AESENC X10, X10 1266 AESENC X11, X11 1267 AESENC X12, X12 1268 AESENC X13, X13 1269 AESENC X14, X14 1270 AESENC X15, X15 1271 1272 AESENC X8, X8 1273 AESENC X9, X9 1274 AESENC X10, X10 1275 AESENC X11, X11 1276 AESENC X12, X12 1277 AESENC X13, X13 1278 AESENC X14, X14 1279 AESENC X15, X15 1280 1281 // combine results 1282 PXOR X12, X8 1283 PXOR X13, X9 1284 PXOR X14, X10 1285 PXOR X15, X11 1286 PXOR X10, X8 1287 PXOR X11, X9 1288 PXOR X9, X8 1289 #ifdef GOEXPERIMENT_regabig 1290 // X15 must be zero on return 1291 PXOR X15, X15 1292 #endif 1293 #ifdef GOEXPERIMENT_regabiargs 1294 MOVQ X8, AX // return X8 1295 #else 1296 MOVQ X8, (DX) 1297 #endif 1298 RET 1299 1300 aes129plus: 1301 // make 7 more starting seeds 1302 MOVO X1, X2 1303 MOVO X1, X3 1304 MOVO X1, X4 1305 MOVO X1, X5 1306 MOVO X1, X6 1307 MOVO X1, X7 1308 PXOR runtime·aeskeysched+16(SB), X1 1309 PXOR runtime·aeskeysched+32(SB), X2 1310 PXOR runtime·aeskeysched+48(SB), X3 1311 PXOR runtime·aeskeysched+64(SB), X4 1312 PXOR runtime·aeskeysched+80(SB), X5 1313 PXOR runtime·aeskeysched+96(SB), X6 1314 PXOR runtime·aeskeysched+112(SB), X7 1315 AESENC X1, X1 1316 AESENC X2, X2 1317 AESENC X3, X3 1318 AESENC X4, X4 1319 AESENC X5, X5 1320 AESENC X6, X6 1321 AESENC X7, X7 1322 1323 // start with last (possibly overlapping) block 1324 MOVOU -128(AX)(CX*1), X8 1325 MOVOU -112(AX)(CX*1), X9 1326 MOVOU -96(AX)(CX*1), X10 1327 MOVOU -80(AX)(CX*1), X11 1328 MOVOU -64(AX)(CX*1), X12 1329 MOVOU -48(AX)(CX*1), X13 1330 MOVOU -32(AX)(CX*1), X14 1331 MOVOU -16(AX)(CX*1), X15 1332 1333 // xor in seed 1334 PXOR X0, X8 1335 PXOR X1, X9 1336 PXOR X2, X10 1337 PXOR X3, X11 1338 PXOR X4, X12 1339 PXOR X5, X13 1340 PXOR X6, X14 1341 PXOR X7, X15 1342 1343 // compute number of remaining 128-byte blocks 1344 DECQ CX 1345 SHRQ $7, CX 1346 1347 aesloop: 1348 // scramble state 1349 AESENC X8, X8 1350 AESENC X9, X9 1351 AESENC X10, X10 1352 AESENC X11, X11 1353 AESENC X12, X12 1354 AESENC X13, X13 1355 AESENC X14, X14 1356 AESENC X15, X15 1357 1358 // scramble state, xor in a block 1359 MOVOU (AX), X0 1360 MOVOU 16(AX), X1 1361 MOVOU 32(AX), X2 1362 MOVOU 48(AX), X3 1363 AESENC X0, X8 1364 AESENC X1, X9 1365 AESENC X2, X10 1366 AESENC X3, X11 1367 MOVOU 64(AX), X4 1368 MOVOU 80(AX), X5 1369 MOVOU 96(AX), X6 1370 MOVOU 112(AX), X7 1371 AESENC X4, X12 1372 AESENC X5, X13 1373 AESENC X6, X14 1374 AESENC X7, X15 1375 1376 ADDQ $128, AX 1377 DECQ CX 1378 JNE aesloop 1379 1380 // 3 more scrambles to finish 1381 AESENC X8, X8 1382 AESENC X9, X9 1383 AESENC X10, X10 1384 AESENC X11, X11 1385 AESENC X12, X12 1386 AESENC X13, X13 1387 AESENC X14, X14 1388 AESENC X15, X15 1389 AESENC X8, X8 1390 AESENC X9, X9 1391 AESENC X10, X10 1392 AESENC X11, X11 1393 AESENC X12, X12 1394 AESENC X13, X13 1395 AESENC X14, X14 1396 AESENC X15, X15 1397 AESENC X8, X8 1398 AESENC X9, X9 1399 AESENC X10, X10 1400 AESENC X11, X11 1401 AESENC X12, X12 1402 AESENC X13, X13 1403 AESENC X14, X14 1404 AESENC X15, X15 1405 1406 PXOR X12, X8 1407 PXOR X13, X9 1408 PXOR X14, X10 1409 PXOR X15, X11 1410 PXOR X10, X8 1411 PXOR X11, X9 1412 PXOR X9, X8 1413 #ifdef GOEXPERIMENT_regabig 1414 // X15 must be zero on return 1415 PXOR X15, X15 1416 #endif 1417 #ifdef GOEXPERIMENT_regabiargs 1418 MOVQ X8, AX // return X8 1419 #else 1420 MOVQ X8, (DX) 1421 #endif 1422 RET 1423 1424 // func memhash32(p unsafe.Pointer, h uintptr) uintptr 1425 // ABIInternal for performance. 1426 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24 1427 #ifdef GOEXPERIMENT_regabiargs 1428 // AX = ptr to data 1429 // BX = seed 1430 #endif 1431 CMPB runtime·useAeshash(SB), $0 1432 JEQ noaes 1433 #ifdef GOEXPERIMENT_regabiargs 1434 MOVQ BX, X0 // X0 = seed 1435 #else 1436 MOVQ p+0(FP), AX // ptr to data 1437 MOVQ h+8(FP), X0 // seed 1438 #endif 1439 PINSRD $2, (AX), X0 // data 1440 AESENC runtime·aeskeysched+0(SB), X0 1441 AESENC runtime·aeskeysched+16(SB), X0 1442 AESENC runtime·aeskeysched+32(SB), X0 1443 #ifdef GOEXPERIMENT_regabiargs 1444 MOVQ X0, AX // return X0 1445 #else 1446 MOVQ X0, ret+16(FP) 1447 #endif 1448 RET 1449 noaes: 1450 JMP runtime·memhash32Fallback<ABIInternal>(SB) 1451 1452 // func memhash64(p unsafe.Pointer, h uintptr) uintptr 1453 // ABIInternal for performance. 1454 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24 1455 #ifdef GOEXPERIMENT_regabiargs 1456 // AX = ptr to data 1457 // BX = seed 1458 #else 1459 #endif 1460 CMPB runtime·useAeshash(SB), $0 1461 JEQ noaes 1462 #ifdef GOEXPERIMENT_regabiargs 1463 MOVQ BX, X0 // X0 = seed 1464 #else 1465 MOVQ p+0(FP), AX // ptr to data 1466 MOVQ h+8(FP), X0 // seed 1467 #endif 1468 PINSRQ $1, (AX), X0 // data 1469 AESENC runtime·aeskeysched+0(SB), X0 1470 AESENC runtime·aeskeysched+16(SB), X0 1471 AESENC runtime·aeskeysched+32(SB), X0 1472 #ifdef GOEXPERIMENT_regabiargs 1473 MOVQ X0, AX // return X0 1474 #else 1475 MOVQ X0, ret+16(FP) 1476 #endif 1477 RET 1478 noaes: 1479 JMP runtime·memhash64Fallback<ABIInternal>(SB) 1480 1481 // simple mask to get rid of data in the high part of the register. 1482 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1483 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1484 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1485 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1486 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1487 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1488 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1489 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1490 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1491 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1492 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1493 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1494 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1495 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1496 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1497 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1498 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1499 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1500 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1501 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1502 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1503 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1504 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1505 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1506 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1507 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1508 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1509 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1510 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1511 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1512 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1513 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1514 GLOBL masks<>(SB),RODATA,$256 1515 1516 // func checkASM() bool 1517 TEXT ·checkASM(SB),NOSPLIT,$0-1 1518 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1519 MOVQ $masks<>(SB), AX 1520 MOVQ $shifts<>(SB), BX 1521 ORQ BX, AX 1522 TESTQ $15, AX 1523 SETEQ ret+0(FP) 1524 RET 1525 1526 // these are arguments to pshufb. They move data down from 1527 // the high bytes of the register to the low bytes of the register. 1528 // index is how many bytes to move. 1529 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1530 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1531 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1532 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1533 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1534 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1535 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1536 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1537 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1538 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1539 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1540 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1541 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1542 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1543 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1544 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1545 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1546 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1547 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1548 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1549 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1550 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1551 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1552 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1553 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1554 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1555 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1556 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1557 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1558 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1559 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1560 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1561 GLOBL shifts<>(SB),RODATA,$256 1562 1563 TEXT runtime·return0(SB), NOSPLIT, $0 1564 MOVL $0, AX 1565 RET 1566 1567 1568 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1569 // Must obey the gcc calling convention. 1570 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1571 get_tls(CX) 1572 MOVQ g(CX), AX 1573 MOVQ g_m(AX), AX 1574 MOVQ m_curg(AX), AX 1575 MOVQ (g_stack+stack_hi)(AX), AX 1576 RET 1577 1578 // The top-most function running on a goroutine 1579 // returns to goexit+PCQuantum. 1580 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0 1581 BYTE $0x90 // NOP 1582 CALL runtime·goexit1(SB) // does not return 1583 // traceback from goexit1 must hit code range of goexit 1584 BYTE $0x90 // NOP 1585 1586 // This is called from .init_array and follows the platform, not Go, ABI. 1587 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1588 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1589 MOVQ runtime·lastmoduledatap(SB), AX 1590 MOVQ DI, moduledata_next(AX) 1591 MOVQ DI, runtime·lastmoduledatap(SB) 1592 POPQ R15 1593 RET 1594 1595 // Initialize special registers then jump to sigpanic. 1596 // This function is injected from the signal handler for panicking 1597 // signals. It is quite painful to set X15 in the signal context, 1598 // so we do it here. 1599 TEXT ·sigpanic0<ABIInternal>(SB),NOSPLIT,$0-0 1600 #ifdef GOEXPERIMENT_regabig 1601 get_tls(R14) 1602 MOVQ g(R14), R14 1603 XORPS X15, X15 1604 #endif 1605 JMP ·sigpanic<ABIInternal>(SB) 1606 1607 // gcWriteBarrier performs a heap pointer write and informs the GC. 1608 // 1609 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1610 // - DI is the destination of the write 1611 // - AX is the value being written at DI 1612 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1613 // but may clobber others (e.g., SSE registers). 1614 // Defined as ABIInternal since it does not use the stack-based Go ABI. 1615 TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112 1616 // Save the registers clobbered by the fast path. This is slightly 1617 // faster than having the caller spill these. 1618 MOVQ R12, 96(SP) 1619 MOVQ R13, 104(SP) 1620 // TODO: Consider passing g.m.p in as an argument so they can be shared 1621 // across a sequence of write barriers. 1622 #ifdef GOEXPERIMENT_regabig 1623 MOVQ g_m(R14), R13 1624 #else 1625 get_tls(R13) 1626 MOVQ g(R13), R13 1627 MOVQ g_m(R13), R13 1628 #endif 1629 MOVQ m_p(R13), R13 1630 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 1631 // Increment wbBuf.next position. 1632 LEAQ 16(R12), R12 1633 MOVQ R12, (p_wbBuf+wbBuf_next)(R13) 1634 CMPQ R12, (p_wbBuf+wbBuf_end)(R13) 1635 // Record the write. 1636 MOVQ AX, -16(R12) // Record value 1637 // Note: This turns bad pointer writes into bad 1638 // pointer reads, which could be confusing. We could avoid 1639 // reading from obviously bad pointers, which would 1640 // take care of the vast majority of these. We could 1641 // patch this up in the signal handler, or use XCHG to 1642 // combine the read and the write. 1643 MOVQ (DI), R13 1644 MOVQ R13, -8(R12) // Record *slot 1645 // Is the buffer full? (flags set in CMPQ above) 1646 JEQ flush 1647 ret: 1648 MOVQ 96(SP), R12 1649 MOVQ 104(SP), R13 1650 // Do the write. 1651 MOVQ AX, (DI) 1652 RET 1653 1654 flush: 1655 // Save all general purpose registers since these could be 1656 // clobbered by wbBufFlush and were not saved by the caller. 1657 // It is possible for wbBufFlush to clobber other registers 1658 // (e.g., SSE registers), but the compiler takes care of saving 1659 // those in the caller if necessary. This strikes a balance 1660 // with registers that are likely to be used. 1661 // 1662 // We don't have type information for these, but all code under 1663 // here is NOSPLIT, so nothing will observe these. 1664 // 1665 // TODO: We could strike a different balance; e.g., saving X0 1666 // and not saving GP registers that are less likely to be used. 1667 MOVQ DI, 0(SP) // Also first argument to wbBufFlush 1668 MOVQ AX, 8(SP) // Also second argument to wbBufFlush 1669 MOVQ BX, 16(SP) 1670 MOVQ CX, 24(SP) 1671 MOVQ DX, 32(SP) 1672 // DI already saved 1673 MOVQ SI, 40(SP) 1674 MOVQ BP, 48(SP) 1675 MOVQ R8, 56(SP) 1676 MOVQ R9, 64(SP) 1677 MOVQ R10, 72(SP) 1678 MOVQ R11, 80(SP) 1679 // R12 already saved 1680 // R13 already saved 1681 // R14 is g 1682 MOVQ R15, 88(SP) 1683 1684 // This takes arguments DI and AX 1685 CALL runtime·wbBufFlush(SB) 1686 1687 MOVQ 0(SP), DI 1688 MOVQ 8(SP), AX 1689 MOVQ 16(SP), BX 1690 MOVQ 24(SP), CX 1691 MOVQ 32(SP), DX 1692 MOVQ 40(SP), SI 1693 MOVQ 48(SP), BP 1694 MOVQ 56(SP), R8 1695 MOVQ 64(SP), R9 1696 MOVQ 72(SP), R10 1697 MOVQ 80(SP), R11 1698 MOVQ 88(SP), R15 1699 JMP ret 1700 1701 // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. 1702 // Defined as ABIInternal since it does not use the stable Go ABI. 1703 TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0 1704 XCHGQ CX, AX 1705 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1706 XCHGQ CX, AX 1707 RET 1708 1709 // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. 1710 // Defined as ABIInternal since it does not use the stable Go ABI. 1711 TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0 1712 XCHGQ DX, AX 1713 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1714 XCHGQ DX, AX 1715 RET 1716 1717 // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. 1718 // Defined as ABIInternal since it does not use the stable Go ABI. 1719 TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0 1720 XCHGQ BX, AX 1721 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1722 XCHGQ BX, AX 1723 RET 1724 1725 // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. 1726 // Defined as ABIInternal since it does not use the stable Go ABI. 1727 TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0 1728 XCHGQ BP, AX 1729 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1730 XCHGQ BP, AX 1731 RET 1732 1733 // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. 1734 // Defined as ABIInternal since it does not use the stable Go ABI. 1735 TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0 1736 XCHGQ SI, AX 1737 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1738 XCHGQ SI, AX 1739 RET 1740 1741 // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. 1742 // Defined as ABIInternal since it does not use the stable Go ABI. 1743 TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0 1744 XCHGQ R8, AX 1745 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1746 XCHGQ R8, AX 1747 RET 1748 1749 // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. 1750 // Defined as ABIInternal since it does not use the stable Go ABI. 1751 TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0 1752 XCHGQ R9, AX 1753 CALL runtime·gcWriteBarrier<ABIInternal>(SB) 1754 XCHGQ R9, AX 1755 RET 1756 1757 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" 1758 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below 1759 1760 // debugCallV2 is the entry point for debugger-injected function 1761 // calls on running goroutines. It informs the runtime that a 1762 // debug call has been injected and creates a call frame for the 1763 // debugger to fill in. 1764 // 1765 // To inject a function call, a debugger should: 1766 // 1. Check that the goroutine is in state _Grunning and that 1767 // there are at least 256 bytes free on the stack. 1768 // 2. Push the current PC on the stack (updating SP). 1769 // 3. Write the desired argument frame size at SP-16 (using the SP 1770 // after step 2). 1771 // 4. Save all machine registers (including flags and XMM reigsters) 1772 // so they can be restored later by the debugger. 1773 // 5. Set the PC to debugCallV2 and resume execution. 1774 // 1775 // If the goroutine is in state _Grunnable, then it's not generally 1776 // safe to inject a call because it may return out via other runtime 1777 // operations. Instead, the debugger should unwind the stack to find 1778 // the return to non-runtime code, add a temporary breakpoint there, 1779 // and inject the call once that breakpoint is hit. 1780 // 1781 // If the goroutine is in any other state, it's not safe to inject a call. 1782 // 1783 // This function communicates back to the debugger by setting R12 and 1784 // invoking INT3 to raise a breakpoint signal. See the comments in the 1785 // implementation for the protocol the debugger is expected to 1786 // follow. InjectDebugCall in the runtime tests demonstrates this protocol. 1787 // 1788 // The debugger must ensure that any pointers passed to the function 1789 // obey escape analysis requirements. Specifically, it must not pass 1790 // a stack pointer to an escaping argument. debugCallV2 cannot check 1791 // this invariant. 1792 // 1793 // This is ABIInternal because Go code injects its PC directly into new 1794 // goroutine stacks. 1795 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0 1796 // Save all registers that may contain pointers so they can be 1797 // conservatively scanned. 1798 // 1799 // We can't do anything that might clobber any of these 1800 // registers before this. 1801 MOVQ R15, r15-(14*8+8)(SP) 1802 MOVQ R14, r14-(13*8+8)(SP) 1803 MOVQ R13, r13-(12*8+8)(SP) 1804 MOVQ R12, r12-(11*8+8)(SP) 1805 MOVQ R11, r11-(10*8+8)(SP) 1806 MOVQ R10, r10-(9*8+8)(SP) 1807 MOVQ R9, r9-(8*8+8)(SP) 1808 MOVQ R8, r8-(7*8+8)(SP) 1809 MOVQ DI, di-(6*8+8)(SP) 1810 MOVQ SI, si-(5*8+8)(SP) 1811 MOVQ BP, bp-(4*8+8)(SP) 1812 MOVQ BX, bx-(3*8+8)(SP) 1813 MOVQ DX, dx-(2*8+8)(SP) 1814 // Save the frame size before we clobber it. Either of the last 1815 // saves could clobber this depending on whether there's a saved BP. 1816 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue 1817 MOVQ CX, cx-(1*8+8)(SP) 1818 MOVQ AX, ax-(0*8+8)(SP) 1819 1820 // Save the argument frame size. 1821 MOVQ DX, frameSize-128(SP) 1822 1823 // Perform a safe-point check. 1824 MOVQ retpc-8(FP), AX // Caller's PC 1825 MOVQ AX, 0(SP) 1826 CALL runtime·debugCallCheck(SB) 1827 MOVQ 8(SP), AX 1828 TESTQ AX, AX 1829 JZ good 1830 // The safety check failed. Put the reason string at the top 1831 // of the stack. 1832 MOVQ AX, 0(SP) 1833 MOVQ 16(SP), AX 1834 MOVQ AX, 8(SP) 1835 // Set R12 to 8 and invoke INT3. The debugger should get the 1836 // reason a call can't be injected from the top of the stack 1837 // and resume execution. 1838 MOVQ $8, R12 1839 BYTE $0xcc 1840 JMP restore 1841 1842 good: 1843 // Registers are saved and it's safe to make a call. 1844 // Open up a call frame, moving the stack if necessary. 1845 // 1846 // Once the frame is allocated, this will set R12 to 0 and 1847 // invoke INT3. The debugger should write the argument 1848 // frame for the call at SP, set up argument registers, push 1849 // the trapping PC on the stack, set the PC to the function to 1850 // call, set RDX to point to the closure (if a closure call), 1851 // and resume execution. 1852 // 1853 // If the function returns, this will set R12 to 1 and invoke 1854 // INT3. The debugger can then inspect any return value saved 1855 // on the stack at SP and in registers and resume execution again. 1856 // 1857 // If the function panics, this will set R12 to 2 and invoke INT3. 1858 // The interface{} value of the panic will be at SP. The debugger 1859 // can inspect the panic value and resume execution again. 1860 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1861 CMPQ AX, $MAXSIZE; \ 1862 JA 5(PC); \ 1863 MOVQ $NAME(SB), AX; \ 1864 MOVQ AX, 0(SP); \ 1865 CALL runtime·debugCallWrap(SB); \ 1866 JMP restore 1867 1868 MOVQ frameSize-128(SP), AX 1869 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1870 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1871 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1872 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1873 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1874 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1875 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1876 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1877 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1878 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1879 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1880 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1881 // The frame size is too large. Report the error. 1882 MOVQ $debugCallFrameTooLarge<>(SB), AX 1883 MOVQ AX, 0(SP) 1884 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string 1885 MOVQ $8, R12 1886 BYTE $0xcc 1887 JMP restore 1888 1889 restore: 1890 // Calls and failures resume here. 1891 // 1892 // Set R12 to 16 and invoke INT3. The debugger should restore 1893 // all registers except RIP and RSP and resume execution. 1894 MOVQ $16, R12 1895 BYTE $0xcc 1896 // We must not modify flags after this point. 1897 1898 // Restore pointer-containing registers, which may have been 1899 // modified from the debugger's copy by stack copying. 1900 MOVQ ax-(0*8+8)(SP), AX 1901 MOVQ cx-(1*8+8)(SP), CX 1902 MOVQ dx-(2*8+8)(SP), DX 1903 MOVQ bx-(3*8+8)(SP), BX 1904 MOVQ bp-(4*8+8)(SP), BP 1905 MOVQ si-(5*8+8)(SP), SI 1906 MOVQ di-(6*8+8)(SP), DI 1907 MOVQ r8-(7*8+8)(SP), R8 1908 MOVQ r9-(8*8+8)(SP), R9 1909 MOVQ r10-(9*8+8)(SP), R10 1910 MOVQ r11-(10*8+8)(SP), R11 1911 MOVQ r12-(11*8+8)(SP), R12 1912 MOVQ r13-(12*8+8)(SP), R13 1913 MOVQ r14-(13*8+8)(SP), R14 1914 MOVQ r15-(14*8+8)(SP), R15 1915 1916 RET 1917 1918 // runtime.debugCallCheck assumes that functions defined with the 1919 // DEBUG_CALL_FN macro are safe points to inject calls. 1920 #define DEBUG_CALL_FN(NAME,MAXSIZE) \ 1921 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 1922 NO_LOCAL_POINTERS; \ 1923 MOVQ $0, R12; \ 1924 BYTE $0xcc; \ 1925 MOVQ $1, R12; \ 1926 BYTE $0xcc; \ 1927 RET 1928 DEBUG_CALL_FN(debugCall32<>, 32) 1929 DEBUG_CALL_FN(debugCall64<>, 64) 1930 DEBUG_CALL_FN(debugCall128<>, 128) 1931 DEBUG_CALL_FN(debugCall256<>, 256) 1932 DEBUG_CALL_FN(debugCall512<>, 512) 1933 DEBUG_CALL_FN(debugCall1024<>, 1024) 1934 DEBUG_CALL_FN(debugCall2048<>, 2048) 1935 DEBUG_CALL_FN(debugCall4096<>, 4096) 1936 DEBUG_CALL_FN(debugCall8192<>, 8192) 1937 DEBUG_CALL_FN(debugCall16384<>, 16384) 1938 DEBUG_CALL_FN(debugCall32768<>, 32768) 1939 DEBUG_CALL_FN(debugCall65536<>, 65536) 1940 1941 // func debugCallPanicked(val interface{}) 1942 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 1943 // Copy the panic value to the top of stack. 1944 MOVQ val_type+0(FP), AX 1945 MOVQ AX, 0(SP) 1946 MOVQ val_data+8(FP), AX 1947 MOVQ AX, 8(SP) 1948 MOVQ $2, R12 1949 BYTE $0xcc 1950 RET 1951 1952 // Note: these functions use a special calling convention to save generated code space. 1953 // Arguments are passed in registers, but the space for those arguments are allocated 1954 // in the caller's stack frame. These stubs write the args into that stack space and 1955 // then tail call to the corresponding runtime handler. 1956 // The tail call makes these stubs disappear in backtraces. 1957 // Defined as ABIInternal since they do not use the stack-based Go ABI. 1958 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16 1959 #ifdef GOEXPERIMENT_regabiargs 1960 MOVQ CX, BX 1961 #else 1962 MOVQ AX, x+0(FP) 1963 MOVQ CX, y+8(FP) 1964 #endif 1965 JMP runtime·goPanicIndex<ABIInternal>(SB) 1966 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16 1967 #ifdef GOEXPERIMENT_regabiargs 1968 MOVQ CX, BX 1969 #else 1970 MOVQ AX, x+0(FP) 1971 MOVQ CX, y+8(FP) 1972 #endif 1973 JMP runtime·goPanicIndexU<ABIInternal>(SB) 1974 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16 1975 #ifdef GOEXPERIMENT_regabiargs 1976 MOVQ CX, AX 1977 MOVQ DX, BX 1978 #else 1979 MOVQ CX, x+0(FP) 1980 MOVQ DX, y+8(FP) 1981 #endif 1982 JMP runtime·goPanicSliceAlen<ABIInternal>(SB) 1983 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16 1984 #ifdef GOEXPERIMENT_regabiargs 1985 MOVQ CX, AX 1986 MOVQ DX, BX 1987 #else 1988 MOVQ CX, x+0(FP) 1989 MOVQ DX, y+8(FP) 1990 #endif 1991 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB) 1992 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16 1993 #ifdef GOEXPERIMENT_regabiargs 1994 MOVQ CX, AX 1995 MOVQ DX, BX 1996 #else 1997 MOVQ CX, x+0(FP) 1998 MOVQ DX, y+8(FP) 1999 #endif 2000 JMP runtime·goPanicSliceAcap<ABIInternal>(SB) 2001 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16 2002 #ifdef GOEXPERIMENT_regabiargs 2003 MOVQ CX, AX 2004 MOVQ DX, BX 2005 #else 2006 MOVQ CX, x+0(FP) 2007 MOVQ DX, y+8(FP) 2008 #endif 2009 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB) 2010 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16 2011 #ifdef GOEXPERIMENT_regabiargs 2012 MOVQ CX, BX 2013 #else 2014 MOVQ AX, x+0(FP) 2015 MOVQ CX, y+8(FP) 2016 #endif 2017 JMP runtime·goPanicSliceB<ABIInternal>(SB) 2018 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16 2019 #ifdef GOEXPERIMENT_regabiargs 2020 MOVQ CX, BX 2021 #else 2022 MOVQ AX, x+0(FP) 2023 MOVQ CX, y+8(FP) 2024 #endif 2025 JMP runtime·goPanicSliceBU<ABIInternal>(SB) 2026 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16 2027 #ifdef GOEXPERIMENT_regabiargs 2028 MOVQ DX, AX 2029 #else 2030 MOVQ DX, x+0(FP) 2031 MOVQ BX, y+8(FP) 2032 #endif 2033 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB) 2034 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16 2035 #ifdef GOEXPERIMENT_regabiargs 2036 MOVQ DX, AX 2037 #else 2038 MOVQ DX, x+0(FP) 2039 MOVQ BX, y+8(FP) 2040 #endif 2041 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB) 2042 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16 2043 #ifdef GOEXPERIMENT_regabiargs 2044 MOVQ DX, AX 2045 #else 2046 MOVQ DX, x+0(FP) 2047 MOVQ BX, y+8(FP) 2048 #endif 2049 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB) 2050 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16 2051 #ifdef GOEXPERIMENT_regabiargs 2052 MOVQ DX, AX 2053 #else 2054 MOVQ DX, x+0(FP) 2055 MOVQ BX, y+8(FP) 2056 #endif 2057 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB) 2058 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16 2059 #ifdef GOEXPERIMENT_regabiargs 2060 MOVQ CX, AX 2061 MOVQ DX, BX 2062 #else 2063 MOVQ CX, x+0(FP) 2064 MOVQ DX, y+8(FP) 2065 #endif 2066 JMP runtime·goPanicSlice3B<ABIInternal>(SB) 2067 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16 2068 #ifdef GOEXPERIMENT_regabiargs 2069 MOVQ CX, AX 2070 MOVQ DX, BX 2071 #else 2072 MOVQ CX, x+0(FP) 2073 MOVQ DX, y+8(FP) 2074 #endif 2075 JMP runtime·goPanicSlice3BU<ABIInternal>(SB) 2076 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16 2077 #ifdef GOEXPERIMENT_regabiargs 2078 MOVQ CX, BX 2079 #else 2080 MOVQ AX, x+0(FP) 2081 MOVQ CX, y+8(FP) 2082 #endif 2083 JMP runtime·goPanicSlice3C<ABIInternal>(SB) 2084 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16 2085 #ifdef GOEXPERIMENT_regabiargs 2086 MOVQ CX, BX 2087 #else 2088 MOVQ AX, x+0(FP) 2089 MOVQ CX, y+8(FP) 2090 #endif 2091 JMP runtime·goPanicSlice3CU<ABIInternal>(SB) 2092 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16 2093 #ifdef GOEXPERIMENT_regabiargs 2094 MOVQ DX, AX 2095 #else 2096 MOVQ DX, x+0(FP) 2097 MOVQ BX, y+8(FP) 2098 #endif 2099 JMP runtime·goPanicSliceConvert<ABIInternal>(SB) 2100 2101 #ifdef GOOS_android 2102 // Use the free TLS_SLOT_APP slot #2 on Android Q. 2103 // Earlier androids are set up in gcc_android.c. 2104 DATA runtime·tls_g+0(SB)/8, $16 2105 GLOBL runtime·tls_g+0(SB), NOPTR, $8 2106 #endif 2107 2108 // The compiler and assembler's -spectre=ret mode rewrites 2109 // all indirect CALL AX / JMP AX instructions to be 2110 // CALL retpolineAX / JMP retpolineAX. 2111 // See https://support.google.com/faqs/answer/7625886. 2112 #define RETPOLINE(reg) \ 2113 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \ 2114 /* nospec: */ \ 2115 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \ 2116 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \ 2117 /* setup: */ \ 2118 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \ 2119 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \ 2120 /* RET */ BYTE $0xC3 2121 2122 TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0) 2123 TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1) 2124 TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2) 2125 TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3) 2126 /* SP is 4, can't happen / magic encodings */ 2127 TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5) 2128 TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6) 2129 TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7) 2130 TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8) 2131 TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9) 2132 TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10) 2133 TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11) 2134 TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12) 2135 TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13) 2136 TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14) 2137 TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)