github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 #include "cgo/abi_amd64.h" 10 11 // _rt0_amd64 is common startup code for most amd64 systems when using 12 // internal linking. This is the entry point for the program from the 13 // kernel for an ordinary -buildmode=exe program. The stack holds the 14 // number of arguments and the C-style argv. 15 TEXT _rt0_amd64(SB),NOSPLIT,$-8 16 MOVQ 0(SP), DI // argc 17 LEAQ 8(SP), SI // argv 18 JMP runtime·rt0_go(SB) 19 20 // main is common startup code for most amd64 systems when using 21 // external linking. The C startup code will call the symbol "main" 22 // passing argc and argv in the usual C ABI registers DI and SI. 23 TEXT main(SB),NOSPLIT,$-8 24 JMP runtime·rt0_go(SB) 25 26 // _rt0_amd64_lib is common startup code for most amd64 systems when 27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 28 // arrange to invoke this function as a global constructor (for 29 // c-archive) or when the shared library is loaded (for c-shared). 30 // We expect argc and argv to be passed in the usual C ABI registers 31 // DI and SI. 32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0 33 // Transition from C ABI to Go ABI. 34 PUSH_REGS_HOST_TO_ABI0() 35 36 MOVQ DI, _rt0_amd64_lib_argc<>(SB) 37 MOVQ SI, _rt0_amd64_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 // Create a new thread to finish Go runtime initialization. 43 MOVQ _cgo_sys_thread_create(SB), AX 44 TESTQ AX, AX 45 JZ nocgo 46 47 // We're calling back to C. 48 // Align stack per ELF ABI requirements. 49 MOVQ SP, BX // Callee-save in C ABI 50 ANDQ $~15, SP 51 MOVQ $_rt0_amd64_lib_go(SB), DI 52 MOVQ $0, SI 53 CALL AX 54 MOVQ BX, SP 55 JMP restore 56 57 nocgo: 58 ADJSP $16 59 MOVQ $0x800000, 0(SP) // stacksize 60 MOVQ $_rt0_amd64_lib_go(SB), AX 61 MOVQ AX, 8(SP) // fn 62 CALL runtime·newosproc0(SB) 63 ADJSP $-16 64 65 restore: 66 POP_REGS_HOST_TO_ABI0() 67 RET 68 69 // _rt0_amd64_lib_go initializes the Go runtime. 70 // This is started in a separate thread by _rt0_amd64_lib. 71 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 72 MOVQ _rt0_amd64_lib_argc<>(SB), DI 73 MOVQ _rt0_amd64_lib_argv<>(SB), SI 74 JMP runtime·rt0_go(SB) 75 76 DATA _rt0_amd64_lib_argc<>(SB)/8, $0 77 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 78 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 79 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 80 81 #ifdef GOAMD64_v2 82 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n" 83 #endif 84 85 #ifdef GOAMD64_v3 86 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n" 87 #endif 88 89 #ifdef GOAMD64_v4 90 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n" 91 #endif 92 93 GLOBL bad_cpu_msg<>(SB), RODATA, $84 94 95 // Define a list of AMD64 microarchitecture level features 96 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 97 98 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT 99 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23) 100 // LAHF/SAHF 101 #define V2_EXT_FEATURES_CX (1 << 0) 102 // FMA MOVBE OSXSAVE AVX F16C 103 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29) 104 // ABM (FOR LZNCT) 105 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5) 106 // BMI1 AVX2 BMI2 107 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8) 108 // XMM YMM 109 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2) 110 111 #define V4_FEATURES_CX V3_FEATURES_CX 112 113 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX 114 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL 115 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31) 116 // OPMASK ZMM 117 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7)) 118 119 #ifdef GOAMD64_v2 120 #define NEED_MAX_CPUID 0x80000001 121 #define NEED_FEATURES_CX V2_FEATURES_CX 122 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX 123 #endif 124 125 #ifdef GOAMD64_v3 126 #define NEED_MAX_CPUID 0x80000001 127 #define NEED_FEATURES_CX V3_FEATURES_CX 128 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX 129 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX 130 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX 131 #endif 132 133 #ifdef GOAMD64_v4 134 #define NEED_MAX_CPUID 0x80000001 135 #define NEED_FEATURES_CX V4_FEATURES_CX 136 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX 137 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX 138 139 // Darwin requires a different approach to check AVX512 support, see CL 285572. 140 #ifdef GOOS_darwin 141 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX 142 // These values are from: 143 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h 144 #define commpage64_base_address 0x00007fffffe00000 145 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010) 146 #define commpage64_version (commpage64_base_address+0x01E) 147 #define AVX512F 0x0000004000000000 148 #define AVX512CD 0x0000008000000000 149 #define AVX512DQ 0x0000010000000000 150 #define AVX512BW 0x0000020000000000 151 #define AVX512VL 0x0000100000000000 152 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL) 153 #else 154 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX 155 #endif 156 157 #endif 158 159 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 160 // copy arguments forward on an even stack 161 MOVQ DI, AX // argc 162 MOVQ SI, BX // argv 163 SUBQ $(5*8), SP // 3args 2auto 164 ANDQ $~15, SP 165 MOVQ AX, 24(SP) 166 MOVQ BX, 32(SP) 167 168 // create istack out of the given (operating system) stack. 169 // _cgo_init may update stackguard. 170 MOVQ $runtime·g0(SB), DI 171 LEAQ (-64*1024)(SP), BX 172 MOVQ BX, g_stackguard0(DI) 173 MOVQ BX, g_stackguard1(DI) 174 MOVQ BX, (g_stack+stack_lo)(DI) 175 MOVQ SP, (g_stack+stack_hi)(DI) 176 177 // find out information about the processor we're on 178 MOVL $0, AX 179 CPUID 180 CMPL AX, $0 181 JE nocpuinfo 182 183 CMPL BX, $0x756E6547 // "Genu" 184 JNE notintel 185 CMPL DX, $0x49656E69 // "ineI" 186 JNE notintel 187 CMPL CX, $0x6C65746E // "ntel" 188 JNE notintel 189 MOVB $1, runtime·isIntel(SB) 190 191 notintel: 192 // Load EAX=1 cpuid flags 193 MOVL $1, AX 194 CPUID 195 MOVL AX, runtime·processorVersionInfo(SB) 196 197 nocpuinfo: 198 // if there is an _cgo_init, call it. 199 MOVQ _cgo_init(SB), AX 200 TESTQ AX, AX 201 JZ needtls 202 // arg 1: g0, already in DI 203 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc 204 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS 205 MOVQ $0, CX 206 #ifdef GOOS_android 207 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 208 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 209 // Compensate for tls_g (+16). 210 MOVQ -16(TLS), CX 211 #endif 212 #ifdef GOOS_windows 213 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 214 // Adjust for the Win64 calling convention. 215 MOVQ CX, R9 // arg 4 216 MOVQ DX, R8 // arg 3 217 MOVQ SI, DX // arg 2 218 MOVQ DI, CX // arg 1 219 #endif 220 CALL AX 221 222 // update stackguard after _cgo_init 223 MOVQ $runtime·g0(SB), CX 224 MOVQ (g_stack+stack_lo)(CX), AX 225 ADDQ $const_stackGuard, AX 226 MOVQ AX, g_stackguard0(CX) 227 MOVQ AX, g_stackguard1(CX) 228 229 #ifndef GOOS_windows 230 JMP ok 231 #endif 232 needtls: 233 #ifdef GOOS_plan9 234 // skip TLS setup on Plan 9 235 JMP ok 236 #endif 237 #ifdef GOOS_solaris 238 // skip TLS setup on Solaris 239 JMP ok 240 #endif 241 #ifdef GOOS_illumos 242 // skip TLS setup on illumos 243 JMP ok 244 #endif 245 #ifdef GOOS_darwin 246 // skip TLS setup on Darwin 247 JMP ok 248 #endif 249 #ifdef GOOS_openbsd 250 // skip TLS setup on OpenBSD 251 JMP ok 252 #endif 253 254 #ifdef GOOS_windows 255 CALL runtime·wintls(SB) 256 #endif 257 258 LEAQ runtime·m0+m_tls(SB), DI 259 CALL runtime·settls(SB) 260 261 // store through it, to make sure it works 262 get_tls(BX) 263 MOVQ $0x123, g(BX) 264 MOVQ runtime·m0+m_tls(SB), AX 265 CMPQ AX, $0x123 266 JEQ 2(PC) 267 CALL runtime·abort(SB) 268 ok: 269 // set the per-goroutine and per-mach "registers" 270 get_tls(BX) 271 LEAQ runtime·g0(SB), CX 272 MOVQ CX, g(BX) 273 LEAQ runtime·m0(SB), AX 274 275 // save m->g0 = g0 276 MOVQ CX, m_g0(AX) 277 // save m0 to g0->m 278 MOVQ AX, g_m(CX) 279 280 CLD // convention is D is always left cleared 281 282 // Check GOAMD64 requirements 283 // We need to do this after setting up TLS, so that 284 // we can report an error if there is a failure. See issue 49586. 285 #ifdef NEED_FEATURES_CX 286 MOVL $0, AX 287 CPUID 288 CMPL AX, $0 289 JE bad_cpu 290 MOVL $1, AX 291 CPUID 292 ANDL $NEED_FEATURES_CX, CX 293 CMPL CX, $NEED_FEATURES_CX 294 JNE bad_cpu 295 #endif 296 297 #ifdef NEED_MAX_CPUID 298 MOVL $0x80000000, AX 299 CPUID 300 CMPL AX, $NEED_MAX_CPUID 301 JL bad_cpu 302 #endif 303 304 #ifdef NEED_EXT_FEATURES_BX 305 MOVL $7, AX 306 MOVL $0, CX 307 CPUID 308 ANDL $NEED_EXT_FEATURES_BX, BX 309 CMPL BX, $NEED_EXT_FEATURES_BX 310 JNE bad_cpu 311 #endif 312 313 #ifdef NEED_EXT_FEATURES_CX 314 MOVL $0x80000001, AX 315 CPUID 316 ANDL $NEED_EXT_FEATURES_CX, CX 317 CMPL CX, $NEED_EXT_FEATURES_CX 318 JNE bad_cpu 319 #endif 320 321 #ifdef NEED_OS_SUPPORT_AX 322 XORL CX, CX 323 XGETBV 324 ANDL $NEED_OS_SUPPORT_AX, AX 325 CMPL AX, $NEED_OS_SUPPORT_AX 326 JNE bad_cpu 327 #endif 328 329 #ifdef NEED_DARWIN_SUPPORT 330 MOVQ $commpage64_version, BX 331 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13 332 JL bad_cpu 333 MOVQ $commpage64_cpu_capabilities64, BX 334 MOVQ (BX), BX 335 MOVQ $NEED_DARWIN_SUPPORT, CX 336 ANDQ CX, BX 337 CMPQ BX, CX 338 JNE bad_cpu 339 #endif 340 341 CALL runtime·check(SB) 342 343 MOVL 24(SP), AX // copy argc 344 MOVL AX, 0(SP) 345 MOVQ 32(SP), AX // copy argv 346 MOVQ AX, 8(SP) 347 CALL runtime·args(SB) 348 CALL runtime·osinit(SB) 349 CALL runtime·schedinit(SB) 350 351 // create a new goroutine to start program 352 MOVQ $runtime·mainPC(SB), AX // entry 353 PUSHQ AX 354 CALL runtime·newproc(SB) 355 POPQ AX 356 357 // start this M 358 CALL runtime·mstart(SB) 359 360 CALL runtime·abort(SB) // mstart should never return 361 RET 362 363 bad_cpu: // show that the program requires a certain microarchitecture level. 364 MOVQ $2, 0(SP) 365 MOVQ $bad_cpu_msg<>(SB), AX 366 MOVQ AX, 8(SP) 367 MOVQ $84, 16(SP) 368 CALL runtime·write(SB) 369 MOVQ $1, 0(SP) 370 CALL runtime·exit(SB) 371 CALL runtime·abort(SB) 372 RET 373 374 // Prevent dead-code elimination of debugCallV2, which is 375 // intended to be called by debuggers. 376 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX 377 RET 378 379 // mainPC is a function value for runtime.main, to be passed to newproc. 380 // The reference to runtime.main is made via ABIInternal, since the 381 // actual function (not the ABI0 wrapper) is needed by newproc. 382 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB) 383 GLOBL runtime·mainPC(SB),RODATA,$8 384 385 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 386 BYTE $0xcc 387 RET 388 389 TEXT runtime·asminit(SB),NOSPLIT,$0-0 390 // No per-thread init. 391 RET 392 393 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 394 CALL runtime·mstart0(SB) 395 RET // not reached 396 397 /* 398 * go-routine 399 */ 400 401 // func gogo(buf *gobuf) 402 // restore state from Gobuf; longjmp 403 TEXT runtime·gogo(SB), NOSPLIT, $0-8 404 MOVQ buf+0(FP), BX // gobuf 405 MOVQ gobuf_g(BX), DX 406 MOVQ 0(DX), CX // make sure g != nil 407 JMP gogo<>(SB) 408 409 TEXT gogo<>(SB), NOSPLIT, $0 410 get_tls(CX) 411 MOVQ DX, g(CX) 412 MOVQ DX, R14 // set the g register 413 MOVQ gobuf_sp(BX), SP // restore SP 414 MOVQ gobuf_ret(BX), AX 415 MOVQ gobuf_ctxt(BX), DX 416 MOVQ gobuf_bp(BX), BP 417 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 418 MOVQ $0, gobuf_ret(BX) 419 MOVQ $0, gobuf_ctxt(BX) 420 MOVQ $0, gobuf_bp(BX) 421 MOVQ gobuf_pc(BX), BX 422 JMP BX 423 424 // func mcall(fn func(*g)) 425 // Switch to m->g0's stack, call fn(g). 426 // Fn must never return. It should gogo(&g->sched) 427 // to keep running g. 428 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8 429 MOVQ AX, DX // DX = fn 430 431 // Save state in g->sched. The caller's SP and PC are restored by gogo to 432 // resume execution in the caller's frame (implicit return). The caller's BP 433 // is also restored to support frame pointer unwinding. 434 MOVQ SP, BX // hide (SP) reads from vet 435 MOVQ 8(BX), BX // caller's PC 436 MOVQ BX, (g_sched+gobuf_pc)(R14) 437 LEAQ fn+0(FP), BX // caller's SP 438 MOVQ BX, (g_sched+gobuf_sp)(R14) 439 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is 440 // can cause a frame pointer cycle, see CL 476235. 441 MOVQ (BP), BX // caller's BP 442 MOVQ BX, (g_sched+gobuf_bp)(R14) 443 444 // switch to m->g0 & its stack, call fn 445 MOVQ g_m(R14), BX 446 MOVQ m_g0(BX), SI // SI = g.m.g0 447 CMPQ SI, R14 // if g == m->g0 call badmcall 448 JNE goodm 449 JMP runtime·badmcall(SB) 450 goodm: 451 MOVQ R14, AX // AX (and arg 0) = g 452 MOVQ SI, R14 // g = g.m.g0 453 get_tls(CX) // Set G in TLS 454 MOVQ R14, g(CX) 455 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp 456 PUSHQ AX // open up space for fn's arg spill slot 457 MOVQ 0(DX), R12 458 CALL R12 // fn(g) 459 POPQ AX 460 JMP runtime·badmcall2(SB) 461 RET 462 463 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 464 // of the G stack. We need to distinguish the routine that 465 // lives at the bottom of the G stack from the one that lives 466 // at the top of the system stack because the one at the top of 467 // the system stack terminates the stack walk (see topofstack()). 468 // The frame layout needs to match systemstack 469 // so that it can pretend to be systemstack_switch. 470 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 471 UNDEF 472 // Make sure this function is not leaf, 473 // so the frame is saved. 474 CALL runtime·abort(SB) 475 RET 476 477 // func systemstack(fn func()) 478 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 479 MOVQ fn+0(FP), DI // DI = fn 480 get_tls(CX) 481 MOVQ g(CX), AX // AX = g 482 MOVQ g_m(AX), BX // BX = m 483 484 CMPQ AX, m_gsignal(BX) 485 JEQ noswitch 486 487 MOVQ m_g0(BX), DX // DX = g0 488 CMPQ AX, DX 489 JEQ noswitch 490 491 CMPQ AX, m_curg(BX) 492 JNE bad 493 494 // Switch stacks. 495 // The original frame pointer is stored in BP, 496 // which is useful for stack unwinding. 497 // Save our state in g->sched. Pretend to 498 // be systemstack_switch if the G stack is scanned. 499 CALL gosave_systemstack_switch<>(SB) 500 501 // switch to g0 502 MOVQ DX, g(CX) 503 MOVQ DX, R14 // set the g register 504 MOVQ (g_sched+gobuf_sp)(DX), SP 505 506 // call target function 507 MOVQ DI, DX 508 MOVQ 0(DI), DI 509 CALL DI 510 511 // switch back to g 512 get_tls(CX) 513 MOVQ g(CX), AX 514 MOVQ g_m(AX), BX 515 MOVQ m_curg(BX), AX 516 MOVQ AX, g(CX) 517 MOVQ (g_sched+gobuf_sp)(AX), SP 518 MOVQ (g_sched+gobuf_bp)(AX), BP 519 MOVQ $0, (g_sched+gobuf_sp)(AX) 520 MOVQ $0, (g_sched+gobuf_bp)(AX) 521 RET 522 523 noswitch: 524 // already on m stack; tail call the function 525 // Using a tail call here cleans up tracebacks since we won't stop 526 // at an intermediate systemstack. 527 MOVQ DI, DX 528 MOVQ 0(DI), DI 529 // The function epilogue is not called on a tail call. 530 // Pop BP from the stack to simulate it. 531 POPQ BP 532 JMP DI 533 534 bad: 535 // Bad: g is not gsignal, not g0, not curg. What is it? 536 MOVQ $runtime·badsystemstack(SB), AX 537 CALL AX 538 INT $3 539 540 // func switchToCrashStack0(fn func()) 541 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8 542 MOVQ g_m(R14), BX // curm 543 544 // set g to gcrash 545 LEAQ runtime·gcrash(SB), R14 // g = &gcrash 546 MOVQ BX, g_m(R14) // g.m = curm 547 MOVQ R14, m_g0(BX) // curm.g0 = g 548 get_tls(CX) 549 MOVQ R14, g(CX) 550 551 // switch to crashstack 552 MOVQ (g_stack+stack_hi)(R14), BX 553 SUBQ $(4*8), BX 554 MOVQ BX, SP 555 556 // call target function 557 MOVQ AX, DX 558 MOVQ 0(AX), AX 559 CALL AX 560 561 // should never return 562 CALL runtime·abort(SB) 563 UNDEF 564 565 /* 566 * support for morestack 567 */ 568 569 // Called during function prolog when more stack is needed. 570 // 571 // The traceback routines see morestack on a g0 as being 572 // the top of a stack (for example, morestack calling newstack 573 // calling the scheduler calling newm calling gc), so we must 574 // record an argument size. For that purpose, it has no arguments. 575 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 576 // Cannot grow scheduler stack (m->g0). 577 get_tls(CX) 578 MOVQ g(CX), DI // DI = g 579 MOVQ g_m(DI), BX // BX = m 580 581 // Set g->sched to context in f. 582 MOVQ 0(SP), AX // f's PC 583 MOVQ AX, (g_sched+gobuf_pc)(DI) 584 LEAQ 8(SP), AX // f's SP 585 MOVQ AX, (g_sched+gobuf_sp)(DI) 586 MOVQ BP, (g_sched+gobuf_bp)(DI) 587 MOVQ DX, (g_sched+gobuf_ctxt)(DI) 588 589 MOVQ m_g0(BX), SI // SI = m.g0 590 CMPQ DI, SI 591 JNE 3(PC) 592 CALL runtime·badmorestackg0(SB) 593 CALL runtime·abort(SB) 594 595 // Cannot grow signal stack (m->gsignal). 596 MOVQ m_gsignal(BX), SI 597 CMPQ DI, SI 598 JNE 3(PC) 599 CALL runtime·badmorestackgsignal(SB) 600 CALL runtime·abort(SB) 601 602 // Called from f. 603 // Set m->morebuf to f's caller. 604 NOP SP // tell vet SP changed - stop checking offsets 605 MOVQ 8(SP), AX // f's caller's PC 606 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 607 LEAQ 16(SP), AX // f's caller's SP 608 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 609 MOVQ DI, (m_morebuf+gobuf_g)(BX) 610 611 // Call newstack on m->g0's stack. 612 MOVQ m_g0(BX), BX 613 MOVQ BX, g(CX) 614 MOVQ (g_sched+gobuf_sp)(BX), SP 615 MOVQ (g_sched+gobuf_bp)(BX), BP 616 CALL runtime·newstack(SB) 617 CALL runtime·abort(SB) // crash if newstack returns 618 RET 619 620 // morestack but not preserving ctxt. 621 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 622 MOVL $0, DX 623 JMP runtime·morestack(SB) 624 625 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12. 626 TEXT ·spillArgs(SB),NOSPLIT,$0-0 627 MOVQ AX, 0(R12) 628 MOVQ BX, 8(R12) 629 MOVQ CX, 16(R12) 630 MOVQ DI, 24(R12) 631 MOVQ SI, 32(R12) 632 MOVQ R8, 40(R12) 633 MOVQ R9, 48(R12) 634 MOVQ R10, 56(R12) 635 MOVQ R11, 64(R12) 636 MOVQ X0, 72(R12) 637 MOVQ X1, 80(R12) 638 MOVQ X2, 88(R12) 639 MOVQ X3, 96(R12) 640 MOVQ X4, 104(R12) 641 MOVQ X5, 112(R12) 642 MOVQ X6, 120(R12) 643 MOVQ X7, 128(R12) 644 MOVQ X8, 136(R12) 645 MOVQ X9, 144(R12) 646 MOVQ X10, 152(R12) 647 MOVQ X11, 160(R12) 648 MOVQ X12, 168(R12) 649 MOVQ X13, 176(R12) 650 MOVQ X14, 184(R12) 651 RET 652 653 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12. 654 TEXT ·unspillArgs(SB),NOSPLIT,$0-0 655 MOVQ 0(R12), AX 656 MOVQ 8(R12), BX 657 MOVQ 16(R12), CX 658 MOVQ 24(R12), DI 659 MOVQ 32(R12), SI 660 MOVQ 40(R12), R8 661 MOVQ 48(R12), R9 662 MOVQ 56(R12), R10 663 MOVQ 64(R12), R11 664 MOVQ 72(R12), X0 665 MOVQ 80(R12), X1 666 MOVQ 88(R12), X2 667 MOVQ 96(R12), X3 668 MOVQ 104(R12), X4 669 MOVQ 112(R12), X5 670 MOVQ 120(R12), X6 671 MOVQ 128(R12), X7 672 MOVQ 136(R12), X8 673 MOVQ 144(R12), X9 674 MOVQ 152(R12), X10 675 MOVQ 160(R12), X11 676 MOVQ 168(R12), X12 677 MOVQ 176(R12), X13 678 MOVQ 184(R12), X14 679 RET 680 681 // reflectcall: call a function with the given argument list 682 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 683 // we don't have variable-sized frames, so we use a small number 684 // of constant-sized-frame functions to encode a few bits of size in the pc. 685 // Caution: ugly multiline assembly macros in your future! 686 687 #define DISPATCH(NAME,MAXSIZE) \ 688 CMPQ CX, $MAXSIZE; \ 689 JA 3(PC); \ 690 MOVQ $NAME(SB), AX; \ 691 JMP AX 692 // Note: can't just "JMP NAME(SB)" - bad inlining results. 693 694 TEXT ·reflectcall(SB), NOSPLIT, $0-48 695 MOVLQZX frameSize+32(FP), CX 696 DISPATCH(runtime·call16, 16) 697 DISPATCH(runtime·call32, 32) 698 DISPATCH(runtime·call64, 64) 699 DISPATCH(runtime·call128, 128) 700 DISPATCH(runtime·call256, 256) 701 DISPATCH(runtime·call512, 512) 702 DISPATCH(runtime·call1024, 1024) 703 DISPATCH(runtime·call2048, 2048) 704 DISPATCH(runtime·call4096, 4096) 705 DISPATCH(runtime·call8192, 8192) 706 DISPATCH(runtime·call16384, 16384) 707 DISPATCH(runtime·call32768, 32768) 708 DISPATCH(runtime·call65536, 65536) 709 DISPATCH(runtime·call131072, 131072) 710 DISPATCH(runtime·call262144, 262144) 711 DISPATCH(runtime·call524288, 524288) 712 DISPATCH(runtime·call1048576, 1048576) 713 DISPATCH(runtime·call2097152, 2097152) 714 DISPATCH(runtime·call4194304, 4194304) 715 DISPATCH(runtime·call8388608, 8388608) 716 DISPATCH(runtime·call16777216, 16777216) 717 DISPATCH(runtime·call33554432, 33554432) 718 DISPATCH(runtime·call67108864, 67108864) 719 DISPATCH(runtime·call134217728, 134217728) 720 DISPATCH(runtime·call268435456, 268435456) 721 DISPATCH(runtime·call536870912, 536870912) 722 DISPATCH(runtime·call1073741824, 1073741824) 723 MOVQ $runtime·badreflectcall(SB), AX 724 JMP AX 725 726 #define CALLFN(NAME,MAXSIZE) \ 727 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \ 728 NO_LOCAL_POINTERS; \ 729 /* copy arguments to stack */ \ 730 MOVQ stackArgs+16(FP), SI; \ 731 MOVLQZX stackArgsSize+24(FP), CX; \ 732 MOVQ SP, DI; \ 733 REP;MOVSB; \ 734 /* set up argument registers */ \ 735 MOVQ regArgs+40(FP), R12; \ 736 CALL ·unspillArgs(SB); \ 737 /* call function */ \ 738 MOVQ f+8(FP), DX; \ 739 PCDATA $PCDATA_StackMapIndex, $0; \ 740 MOVQ (DX), R12; \ 741 CALL R12; \ 742 /* copy register return values back */ \ 743 MOVQ regArgs+40(FP), R12; \ 744 CALL ·spillArgs(SB); \ 745 MOVLQZX stackArgsSize+24(FP), CX; \ 746 MOVLQZX stackRetOffset+28(FP), BX; \ 747 MOVQ stackArgs+16(FP), DI; \ 748 MOVQ stackArgsType+0(FP), DX; \ 749 MOVQ SP, SI; \ 750 ADDQ BX, DI; \ 751 ADDQ BX, SI; \ 752 SUBQ BX, CX; \ 753 CALL callRet<>(SB); \ 754 RET 755 756 // callRet copies return values back at the end of call*. This is a 757 // separate function so it can allocate stack space for the arguments 758 // to reflectcallmove. It does not follow the Go ABI; it expects its 759 // arguments in registers. 760 TEXT callRet<>(SB), NOSPLIT, $40-0 761 NO_LOCAL_POINTERS 762 MOVQ DX, 0(SP) 763 MOVQ DI, 8(SP) 764 MOVQ SI, 16(SP) 765 MOVQ CX, 24(SP) 766 MOVQ R12, 32(SP) 767 CALL runtime·reflectcallmove(SB) 768 RET 769 770 CALLFN(·call16, 16) 771 CALLFN(·call32, 32) 772 CALLFN(·call64, 64) 773 CALLFN(·call128, 128) 774 CALLFN(·call256, 256) 775 CALLFN(·call512, 512) 776 CALLFN(·call1024, 1024) 777 CALLFN(·call2048, 2048) 778 CALLFN(·call4096, 4096) 779 CALLFN(·call8192, 8192) 780 CALLFN(·call16384, 16384) 781 CALLFN(·call32768, 32768) 782 CALLFN(·call65536, 65536) 783 CALLFN(·call131072, 131072) 784 CALLFN(·call262144, 262144) 785 CALLFN(·call524288, 524288) 786 CALLFN(·call1048576, 1048576) 787 CALLFN(·call2097152, 2097152) 788 CALLFN(·call4194304, 4194304) 789 CALLFN(·call8388608, 8388608) 790 CALLFN(·call16777216, 16777216) 791 CALLFN(·call33554432, 33554432) 792 CALLFN(·call67108864, 67108864) 793 CALLFN(·call134217728, 134217728) 794 CALLFN(·call268435456, 268435456) 795 CALLFN(·call536870912, 536870912) 796 CALLFN(·call1073741824, 1073741824) 797 798 TEXT runtime·procyield(SB),NOSPLIT,$0-0 799 MOVL cycles+0(FP), AX 800 again: 801 PAUSE 802 SUBL $1, AX 803 JNZ again 804 RET 805 806 807 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0 808 // Stores are already ordered on x86, so this is just a 809 // compile barrier. 810 RET 811 812 // Save state of caller into g->sched, 813 // but using fake PC from systemstack_switch. 814 // Must only be called from functions with frame pointer 815 // and without locals ($0) or else unwinding from 816 // systemstack_switch is incorrect. 817 // Smashes R9. 818 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0 819 // Take systemstack_switch PC and add 8 bytes to skip 820 // the prologue. The final location does not matter 821 // as long as we are between the prologue and the epilogue. 822 MOVQ $runtime·systemstack_switch+8(SB), R9 823 MOVQ R9, (g_sched+gobuf_pc)(R14) 824 LEAQ 8(SP), R9 825 MOVQ R9, (g_sched+gobuf_sp)(R14) 826 MOVQ $0, (g_sched+gobuf_ret)(R14) 827 MOVQ BP, (g_sched+gobuf_bp)(R14) 828 // Assert ctxt is zero. See func save. 829 MOVQ (g_sched+gobuf_ctxt)(R14), R9 830 TESTQ R9, R9 831 JZ 2(PC) 832 CALL runtime·abort(SB) 833 RET 834 835 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 836 // Call fn(arg) aligned appropriately for the gcc ABI. 837 // Called on a system stack, and there may be no g yet (during needm). 838 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16 839 MOVQ fn+0(FP), AX 840 MOVQ arg+8(FP), BX 841 MOVQ SP, DX 842 ANDQ $~15, SP // alignment 843 MOVQ DX, 8(SP) 844 MOVQ BX, DI // DI = first argument in AMD64 ABI 845 MOVQ BX, CX // CX = first argument in Win64 846 CALL AX 847 MOVQ 8(SP), DX 848 MOVQ DX, SP 849 RET 850 851 // asmcgocall_landingpad calls AX with BX as argument. 852 // Must be called on the system stack. 853 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0 854 #ifdef GOOS_windows 855 // Make sure we have enough room for 4 stack-backed fast-call 856 // registers as per Windows amd64 calling convention. 857 ADJSP $32 858 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions 859 // thrown in the cgo call. Exceptions that reach this function will be 860 // handled by runtime.sehtramp thanks to the SEH metadata added 861 // by the compiler. 862 // Note that runtime.sehtramp can't be attached directly to asmcgocall 863 // because its initial stack pointer can be outside the system stack bounds, 864 // and Windows stops the stack unwinding without calling the exception handler 865 // when it reaches that point. 866 MOVQ BX, CX // CX = first argument in Win64 867 CALL AX 868 // The exception handler is not called if the next instruction is part of 869 // the epilogue, which includes the RET instruction, so we need to add a NOP here. 870 BYTE $0x90 871 ADJSP $-32 872 RET 873 #endif 874 // Tail call AX on non-Windows, as the extra stack frame is not needed. 875 MOVQ BX, DI // DI = first argument in AMD64 ABI 876 JMP AX 877 878 // func asmcgocall(fn, arg unsafe.Pointer) int32 879 // Call fn(arg) on the scheduler stack, 880 // aligned appropriately for the gcc ABI. 881 // See cgocall.go for more details. 882 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 883 MOVQ fn+0(FP), AX 884 MOVQ arg+8(FP), BX 885 886 MOVQ SP, DX 887 888 // Figure out if we need to switch to m->g0 stack. 889 // We get called to create new OS threads too, and those 890 // come in on the m->g0 stack already. Or we might already 891 // be on the m->gsignal stack. 892 get_tls(CX) 893 MOVQ g(CX), DI 894 CMPQ DI, $0 895 JEQ nosave 896 MOVQ g_m(DI), R8 897 MOVQ m_gsignal(R8), SI 898 CMPQ DI, SI 899 JEQ nosave 900 MOVQ m_g0(R8), SI 901 CMPQ DI, SI 902 JEQ nosave 903 904 // Switch to system stack. 905 // The original frame pointer is stored in BP, 906 // which is useful for stack unwinding. 907 CALL gosave_systemstack_switch<>(SB) 908 MOVQ SI, g(CX) 909 MOVQ (g_sched+gobuf_sp)(SI), SP 910 911 // Now on a scheduling stack (a pthread-created stack). 912 SUBQ $16, SP 913 ANDQ $~15, SP // alignment for gcc ABI 914 MOVQ DI, 8(SP) // save g 915 MOVQ (g_stack+stack_hi)(DI), DI 916 SUBQ DX, DI 917 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 918 CALL runtime·asmcgocall_landingpad(SB) 919 920 // Restore registers, g, stack pointer. 921 get_tls(CX) 922 MOVQ 8(SP), DI 923 MOVQ (g_stack+stack_hi)(DI), SI 924 SUBQ 0(SP), SI 925 MOVQ DI, g(CX) 926 MOVQ SI, SP 927 928 MOVL AX, ret+16(FP) 929 RET 930 931 nosave: 932 // Running on a system stack, perhaps even without a g. 933 // Having no g can happen during thread creation or thread teardown 934 // (see needm/dropm on Solaris, for example). 935 // This code is like the above sequence but without saving/restoring g 936 // and without worrying about the stack moving out from under us 937 // (because we're on a system stack, not a goroutine stack). 938 // The above code could be used directly if already on a system stack, 939 // but then the only path through this code would be a rare case on Solaris. 940 // Using this code for all "already on system stack" calls exercises it more, 941 // which should help keep it correct. 942 SUBQ $16, SP 943 ANDQ $~15, SP 944 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging 945 MOVQ DX, 0(SP) // save original stack pointer 946 CALL runtime·asmcgocall_landingpad(SB) 947 MOVQ 0(SP), SI // restore original stack pointer 948 MOVQ SI, SP 949 MOVL AX, ret+16(FP) 950 RET 951 952 #ifdef GOOS_windows 953 // Dummy TLS that's used on Windows so that we don't crash trying 954 // to restore the G register in needm. needm and its callees are 955 // very careful never to actually use the G, the TLS just can't be 956 // unset since we're in Go code. 957 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize 958 #endif 959 960 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 961 // See cgocall.go for more details. 962 TEXT ·cgocallback(SB),NOSPLIT,$24-24 963 NO_LOCAL_POINTERS 964 965 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g. 966 // It is used to dropm while thread is exiting. 967 MOVQ fn+0(FP), AX 968 CMPQ AX, $0 969 JNE loadg 970 // Restore the g from frame. 971 get_tls(CX) 972 MOVQ frame+8(FP), BX 973 MOVQ BX, g(CX) 974 JMP dropm 975 976 loadg: 977 // If g is nil, Go did not create the current thread, 978 // or if this thread never called into Go on pthread platforms. 979 // Call needm to obtain one m for temporary use. 980 // In this case, we're running on the thread stack, so there's 981 // lots of space, but the linker doesn't know. Hide the call from 982 // the linker analysis by using an indirect call through AX. 983 get_tls(CX) 984 #ifdef GOOS_windows 985 MOVL $0, BX 986 CMPQ CX, $0 987 JEQ 2(PC) 988 #endif 989 MOVQ g(CX), BX 990 CMPQ BX, $0 991 JEQ needm 992 MOVQ g_m(BX), BX 993 MOVQ BX, savedm-8(SP) // saved copy of oldm 994 JMP havem 995 needm: 996 #ifdef GOOS_windows 997 // Set up a dummy TLS value. needm is careful not to use it, 998 // but it needs to be there to prevent autogenerated code from 999 // crashing when it loads from it. 1000 // We don't need to clear it or anything later because needm 1001 // will set up TLS properly. 1002 MOVQ $zeroTLS<>(SB), DI 1003 CALL runtime·settls(SB) 1004 #endif 1005 // On some platforms (Windows) we cannot call needm through 1006 // an ABI wrapper because there's no TLS set up, and the ABI 1007 // wrapper will try to restore the G register (R14) from TLS. 1008 // Clear X15 because Go expects it and we're not calling 1009 // through a wrapper, but otherwise avoid setting the G 1010 // register in the wrapper and call needm directly. It 1011 // takes no arguments and doesn't return any values so 1012 // there's no need to handle that. Clear R14 so that there's 1013 // a bad value in there, in case needm tries to use it. 1014 XORPS X15, X15 1015 XORQ R14, R14 1016 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX 1017 CALL AX 1018 MOVQ $0, savedm-8(SP) 1019 get_tls(CX) 1020 MOVQ g(CX), BX 1021 MOVQ g_m(BX), BX 1022 1023 // Set m->sched.sp = SP, so that if a panic happens 1024 // during the function we are about to execute, it will 1025 // have a valid SP to run on the g0 stack. 1026 // The next few lines (after the havem label) 1027 // will save this SP onto the stack and then write 1028 // the same SP back to m->sched.sp. That seems redundant, 1029 // but if an unrecovered panic happens, unwindm will 1030 // restore the g->sched.sp from the stack location 1031 // and then systemstack will try to use it. If we don't set it here, 1032 // that restored SP will be uninitialized (typically 0) and 1033 // will not be usable. 1034 MOVQ m_g0(BX), SI 1035 MOVQ SP, (g_sched+gobuf_sp)(SI) 1036 1037 havem: 1038 // Now there's a valid m, and we're running on its m->g0. 1039 // Save current m->g0->sched.sp on stack and then set it to SP. 1040 // Save current sp in m->g0->sched.sp in preparation for 1041 // switch back to m->curg stack. 1042 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 1043 MOVQ m_g0(BX), SI 1044 MOVQ (g_sched+gobuf_sp)(SI), AX 1045 MOVQ AX, 0(SP) 1046 MOVQ SP, (g_sched+gobuf_sp)(SI) 1047 1048 // Switch to m->curg stack and call runtime.cgocallbackg. 1049 // Because we are taking over the execution of m->curg 1050 // but *not* resuming what had been running, we need to 1051 // save that information (m->curg->sched) so we can restore it. 1052 // We can restore m->curg->sched.sp easily, because calling 1053 // runtime.cgocallbackg leaves SP unchanged upon return. 1054 // To save m->curg->sched.pc, we push it onto the curg stack and 1055 // open a frame the same size as cgocallback's g0 frame. 1056 // Once we switch to the curg stack, the pushed PC will appear 1057 // to be the return PC of cgocallback, so that the traceback 1058 // will seamlessly trace back into the earlier calls. 1059 MOVQ m_curg(BX), SI 1060 MOVQ SI, g(CX) 1061 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 1062 MOVQ (g_sched+gobuf_pc)(SI), BX 1063 MOVQ BX, -8(DI) // "push" return PC on the g stack 1064 // Gather our arguments into registers. 1065 MOVQ fn+0(FP), BX 1066 MOVQ frame+8(FP), CX 1067 MOVQ ctxt+16(FP), DX 1068 // Compute the size of the frame, including return PC and, if 1069 // GOEXPERIMENT=framepointer, the saved base pointer 1070 LEAQ fn+0(FP), AX 1071 SUBQ SP, AX // AX is our actual frame size 1072 SUBQ AX, DI // Allocate the same frame size on the g stack 1073 MOVQ DI, SP 1074 1075 MOVQ BX, 0(SP) 1076 MOVQ CX, 8(SP) 1077 MOVQ DX, 16(SP) 1078 MOVQ $runtime·cgocallbackg(SB), AX 1079 CALL AX // indirect call to bypass nosplit check. We're on a different stack now. 1080 1081 // Compute the size of the frame again. FP and SP have 1082 // completely different values here than they did above, 1083 // but only their difference matters. 1084 LEAQ fn+0(FP), AX 1085 SUBQ SP, AX 1086 1087 // Restore g->sched (== m->curg->sched) from saved values. 1088 get_tls(CX) 1089 MOVQ g(CX), SI 1090 MOVQ SP, DI 1091 ADDQ AX, DI 1092 MOVQ -8(DI), BX 1093 MOVQ BX, (g_sched+gobuf_pc)(SI) 1094 MOVQ DI, (g_sched+gobuf_sp)(SI) 1095 1096 // Switch back to m->g0's stack and restore m->g0->sched.sp. 1097 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 1098 // so we do not have to restore it.) 1099 MOVQ g(CX), BX 1100 MOVQ g_m(BX), BX 1101 MOVQ m_g0(BX), SI 1102 MOVQ SI, g(CX) 1103 MOVQ (g_sched+gobuf_sp)(SI), SP 1104 MOVQ 0(SP), AX 1105 MOVQ AX, (g_sched+gobuf_sp)(SI) 1106 1107 // If the m on entry was nil, we called needm above to borrow an m, 1108 // 1. for the duration of the call on non-pthread platforms, 1109 // 2. or the duration of the C thread alive on pthread platforms. 1110 // If the m on entry wasn't nil, 1111 // 1. the thread might be a Go thread, 1112 // 2. or it wasn't the first call from a C thread on pthread platforms, 1113 // since then we skip dropm to reuse the m in the first call. 1114 MOVQ savedm-8(SP), BX 1115 CMPQ BX, $0 1116 JNE done 1117 1118 // Skip dropm to reuse it in the next call, when a pthread key has been created. 1119 MOVQ _cgo_pthread_key_created(SB), AX 1120 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm. 1121 CMPQ AX, $0 1122 JEQ dropm 1123 CMPQ (AX), $0 1124 JNE done 1125 1126 dropm: 1127 MOVQ $runtime·dropm(SB), AX 1128 CALL AX 1129 #ifdef GOOS_windows 1130 // We need to clear the TLS pointer in case the next 1131 // thread that comes into Go tries to reuse that space 1132 // but uses the same M. 1133 XORQ DI, DI 1134 CALL runtime·settls(SB) 1135 #endif 1136 done: 1137 1138 // Done! 1139 RET 1140 1141 // func setg(gg *g) 1142 // set g. for use by needm. 1143 TEXT runtime·setg(SB), NOSPLIT, $0-8 1144 MOVQ gg+0(FP), BX 1145 get_tls(CX) 1146 MOVQ BX, g(CX) 1147 RET 1148 1149 // void setg_gcc(G*); set g called from gcc. 1150 TEXT setg_gcc<>(SB),NOSPLIT,$0 1151 get_tls(AX) 1152 MOVQ DI, g(AX) 1153 MOVQ DI, R14 // set the g register 1154 RET 1155 1156 TEXT runtime·abort(SB),NOSPLIT,$0-0 1157 INT $3 1158 loop: 1159 JMP loop 1160 1161 // check that SP is in range [g->stack.lo, g->stack.hi) 1162 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0 1163 get_tls(CX) 1164 MOVQ g(CX), AX 1165 CMPQ (g_stack+stack_hi)(AX), SP 1166 JHI 2(PC) 1167 CALL runtime·abort(SB) 1168 CMPQ SP, (g_stack+stack_lo)(AX) 1169 JHI 2(PC) 1170 CALL runtime·abort(SB) 1171 RET 1172 1173 // func cputicks() int64 1174 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 1175 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1 1176 JNE fences 1177 // Instruction stream serializing RDTSCP is supported. 1178 // RDTSCP is supported by Intel Nehalem (2008) and 1179 // AMD K8 Rev. F (2006) and newer. 1180 RDTSCP 1181 done: 1182 SHLQ $32, DX 1183 ADDQ DX, AX 1184 MOVQ AX, ret+0(FP) 1185 RET 1186 fences: 1187 // MFENCE is instruction stream serializing and flushes the 1188 // store buffers on AMD. The serialization semantics of LFENCE on AMD 1189 // are dependent on MSR C001_1029 and CPU generation. 1190 // LFENCE on Intel does wait for all previous instructions to have executed. 1191 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all 1192 // previous instructions executed and all previous loads and stores to globally visible. 1193 // Using MFENCE;LFENCE here aligns the serializing properties without 1194 // runtime detection of CPU manufacturer. 1195 MFENCE 1196 LFENCE 1197 RDTSC 1198 JMP done 1199 1200 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr 1201 // hash function using AES hardware instructions 1202 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32 1203 // AX = ptr to data 1204 // BX = seed 1205 // CX = size 1206 CMPB runtime·useAeshash(SB), $0 1207 JEQ noaes 1208 JMP aeshashbody<>(SB) 1209 noaes: 1210 JMP runtime·memhashFallback<ABIInternal>(SB) 1211 1212 // func strhash(p unsafe.Pointer, h uintptr) uintptr 1213 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24 1214 // AX = ptr to string struct 1215 // BX = seed 1216 CMPB runtime·useAeshash(SB), $0 1217 JEQ noaes 1218 MOVQ 8(AX), CX // length of string 1219 MOVQ (AX), AX // string data 1220 JMP aeshashbody<>(SB) 1221 noaes: 1222 JMP runtime·strhashFallback<ABIInternal>(SB) 1223 1224 // AX: data 1225 // BX: hash seed 1226 // CX: length 1227 // At return: AX = return value 1228 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 1229 // Fill an SSE register with our seeds. 1230 MOVQ BX, X0 // 64 bits of per-table hash seed 1231 PINSRW $4, CX, X0 // 16 bits of length 1232 PSHUFHW $0, X0, X0 // repeat length 4 times total 1233 MOVO X0, X1 // save unscrambled seed 1234 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 1235 AESENC X0, X0 // scramble seed 1236 1237 CMPQ CX, $16 1238 JB aes0to15 1239 JE aes16 1240 CMPQ CX, $32 1241 JBE aes17to32 1242 CMPQ CX, $64 1243 JBE aes33to64 1244 CMPQ CX, $128 1245 JBE aes65to128 1246 JMP aes129plus 1247 1248 aes0to15: 1249 TESTQ CX, CX 1250 JE aes0 1251 1252 ADDQ $16, AX 1253 TESTW $0xff0, AX 1254 JE endofpage 1255 1256 // 16 bytes loaded at this address won't cross 1257 // a page boundary, so we can load it directly. 1258 MOVOU -16(AX), X1 1259 ADDQ CX, CX 1260 MOVQ $masks<>(SB), AX 1261 PAND (AX)(CX*8), X1 1262 final1: 1263 PXOR X0, X1 // xor data with seed 1264 AESENC X1, X1 // scramble combo 3 times 1265 AESENC X1, X1 1266 AESENC X1, X1 1267 MOVQ X1, AX // return X1 1268 RET 1269 1270 endofpage: 1271 // address ends in 1111xxxx. Might be up against 1272 // a page boundary, so load ending at last byte. 1273 // Then shift bytes down using pshufb. 1274 MOVOU -32(AX)(CX*1), X1 1275 ADDQ CX, CX 1276 MOVQ $shifts<>(SB), AX 1277 PSHUFB (AX)(CX*8), X1 1278 JMP final1 1279 1280 aes0: 1281 // Return scrambled input seed 1282 AESENC X0, X0 1283 MOVQ X0, AX // return X0 1284 RET 1285 1286 aes16: 1287 MOVOU (AX), X1 1288 JMP final1 1289 1290 aes17to32: 1291 // make second starting seed 1292 PXOR runtime·aeskeysched+16(SB), X1 1293 AESENC X1, X1 1294 1295 // load data to be hashed 1296 MOVOU (AX), X2 1297 MOVOU -16(AX)(CX*1), X3 1298 1299 // xor with seed 1300 PXOR X0, X2 1301 PXOR X1, X3 1302 1303 // scramble 3 times 1304 AESENC X2, X2 1305 AESENC X3, X3 1306 AESENC X2, X2 1307 AESENC X3, X3 1308 AESENC X2, X2 1309 AESENC X3, X3 1310 1311 // combine results 1312 PXOR X3, X2 1313 MOVQ X2, AX // return X2 1314 RET 1315 1316 aes33to64: 1317 // make 3 more starting seeds 1318 MOVO X1, X2 1319 MOVO X1, X3 1320 PXOR runtime·aeskeysched+16(SB), X1 1321 PXOR runtime·aeskeysched+32(SB), X2 1322 PXOR runtime·aeskeysched+48(SB), X3 1323 AESENC X1, X1 1324 AESENC X2, X2 1325 AESENC X3, X3 1326 1327 MOVOU (AX), X4 1328 MOVOU 16(AX), X5 1329 MOVOU -32(AX)(CX*1), X6 1330 MOVOU -16(AX)(CX*1), X7 1331 1332 PXOR X0, X4 1333 PXOR X1, X5 1334 PXOR X2, X6 1335 PXOR X3, X7 1336 1337 AESENC X4, X4 1338 AESENC X5, X5 1339 AESENC X6, X6 1340 AESENC X7, X7 1341 1342 AESENC X4, X4 1343 AESENC X5, X5 1344 AESENC X6, X6 1345 AESENC X7, X7 1346 1347 AESENC X4, X4 1348 AESENC X5, X5 1349 AESENC X6, X6 1350 AESENC X7, X7 1351 1352 PXOR X6, X4 1353 PXOR X7, X5 1354 PXOR X5, X4 1355 MOVQ X4, AX // return X4 1356 RET 1357 1358 aes65to128: 1359 // make 7 more starting seeds 1360 MOVO X1, X2 1361 MOVO X1, X3 1362 MOVO X1, X4 1363 MOVO X1, X5 1364 MOVO X1, X6 1365 MOVO X1, X7 1366 PXOR runtime·aeskeysched+16(SB), X1 1367 PXOR runtime·aeskeysched+32(SB), X2 1368 PXOR runtime·aeskeysched+48(SB), X3 1369 PXOR runtime·aeskeysched+64(SB), X4 1370 PXOR runtime·aeskeysched+80(SB), X5 1371 PXOR runtime·aeskeysched+96(SB), X6 1372 PXOR runtime·aeskeysched+112(SB), X7 1373 AESENC X1, X1 1374 AESENC X2, X2 1375 AESENC X3, X3 1376 AESENC X4, X4 1377 AESENC X5, X5 1378 AESENC X6, X6 1379 AESENC X7, X7 1380 1381 // load data 1382 MOVOU (AX), X8 1383 MOVOU 16(AX), X9 1384 MOVOU 32(AX), X10 1385 MOVOU 48(AX), X11 1386 MOVOU -64(AX)(CX*1), X12 1387 MOVOU -48(AX)(CX*1), X13 1388 MOVOU -32(AX)(CX*1), X14 1389 MOVOU -16(AX)(CX*1), X15 1390 1391 // xor with seed 1392 PXOR X0, X8 1393 PXOR X1, X9 1394 PXOR X2, X10 1395 PXOR X3, X11 1396 PXOR X4, X12 1397 PXOR X5, X13 1398 PXOR X6, X14 1399 PXOR X7, X15 1400 1401 // scramble 3 times 1402 AESENC X8, X8 1403 AESENC X9, X9 1404 AESENC X10, X10 1405 AESENC X11, X11 1406 AESENC X12, X12 1407 AESENC X13, X13 1408 AESENC X14, X14 1409 AESENC X15, X15 1410 1411 AESENC X8, X8 1412 AESENC X9, X9 1413 AESENC X10, X10 1414 AESENC X11, X11 1415 AESENC X12, X12 1416 AESENC X13, X13 1417 AESENC X14, X14 1418 AESENC X15, X15 1419 1420 AESENC X8, X8 1421 AESENC X9, X9 1422 AESENC X10, X10 1423 AESENC X11, X11 1424 AESENC X12, X12 1425 AESENC X13, X13 1426 AESENC X14, X14 1427 AESENC X15, X15 1428 1429 // combine results 1430 PXOR X12, X8 1431 PXOR X13, X9 1432 PXOR X14, X10 1433 PXOR X15, X11 1434 PXOR X10, X8 1435 PXOR X11, X9 1436 PXOR X9, X8 1437 // X15 must be zero on return 1438 PXOR X15, X15 1439 MOVQ X8, AX // return X8 1440 RET 1441 1442 aes129plus: 1443 // make 7 more starting seeds 1444 MOVO X1, X2 1445 MOVO X1, X3 1446 MOVO X1, X4 1447 MOVO X1, X5 1448 MOVO X1, X6 1449 MOVO X1, X7 1450 PXOR runtime·aeskeysched+16(SB), X1 1451 PXOR runtime·aeskeysched+32(SB), X2 1452 PXOR runtime·aeskeysched+48(SB), X3 1453 PXOR runtime·aeskeysched+64(SB), X4 1454 PXOR runtime·aeskeysched+80(SB), X5 1455 PXOR runtime·aeskeysched+96(SB), X6 1456 PXOR runtime·aeskeysched+112(SB), X7 1457 AESENC X1, X1 1458 AESENC X2, X2 1459 AESENC X3, X3 1460 AESENC X4, X4 1461 AESENC X5, X5 1462 AESENC X6, X6 1463 AESENC X7, X7 1464 1465 // start with last (possibly overlapping) block 1466 MOVOU -128(AX)(CX*1), X8 1467 MOVOU -112(AX)(CX*1), X9 1468 MOVOU -96(AX)(CX*1), X10 1469 MOVOU -80(AX)(CX*1), X11 1470 MOVOU -64(AX)(CX*1), X12 1471 MOVOU -48(AX)(CX*1), X13 1472 MOVOU -32(AX)(CX*1), X14 1473 MOVOU -16(AX)(CX*1), X15 1474 1475 // xor in seed 1476 PXOR X0, X8 1477 PXOR X1, X9 1478 PXOR X2, X10 1479 PXOR X3, X11 1480 PXOR X4, X12 1481 PXOR X5, X13 1482 PXOR X6, X14 1483 PXOR X7, X15 1484 1485 // compute number of remaining 128-byte blocks 1486 DECQ CX 1487 SHRQ $7, CX 1488 1489 PCALIGN $16 1490 aesloop: 1491 // scramble state 1492 AESENC X8, X8 1493 AESENC X9, X9 1494 AESENC X10, X10 1495 AESENC X11, X11 1496 AESENC X12, X12 1497 AESENC X13, X13 1498 AESENC X14, X14 1499 AESENC X15, X15 1500 1501 // scramble state, xor in a block 1502 MOVOU (AX), X0 1503 MOVOU 16(AX), X1 1504 MOVOU 32(AX), X2 1505 MOVOU 48(AX), X3 1506 AESENC X0, X8 1507 AESENC X1, X9 1508 AESENC X2, X10 1509 AESENC X3, X11 1510 MOVOU 64(AX), X4 1511 MOVOU 80(AX), X5 1512 MOVOU 96(AX), X6 1513 MOVOU 112(AX), X7 1514 AESENC X4, X12 1515 AESENC X5, X13 1516 AESENC X6, X14 1517 AESENC X7, X15 1518 1519 ADDQ $128, AX 1520 DECQ CX 1521 JNE aesloop 1522 1523 // 3 more scrambles to finish 1524 AESENC X8, X8 1525 AESENC X9, X9 1526 AESENC X10, X10 1527 AESENC X11, X11 1528 AESENC X12, X12 1529 AESENC X13, X13 1530 AESENC X14, X14 1531 AESENC X15, X15 1532 AESENC X8, X8 1533 AESENC X9, X9 1534 AESENC X10, X10 1535 AESENC X11, X11 1536 AESENC X12, X12 1537 AESENC X13, X13 1538 AESENC X14, X14 1539 AESENC X15, X15 1540 AESENC X8, X8 1541 AESENC X9, X9 1542 AESENC X10, X10 1543 AESENC X11, X11 1544 AESENC X12, X12 1545 AESENC X13, X13 1546 AESENC X14, X14 1547 AESENC X15, X15 1548 1549 PXOR X12, X8 1550 PXOR X13, X9 1551 PXOR X14, X10 1552 PXOR X15, X11 1553 PXOR X10, X8 1554 PXOR X11, X9 1555 PXOR X9, X8 1556 // X15 must be zero on return 1557 PXOR X15, X15 1558 MOVQ X8, AX // return X8 1559 RET 1560 1561 // func memhash32(p unsafe.Pointer, h uintptr) uintptr 1562 // ABIInternal for performance. 1563 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24 1564 // AX = ptr to data 1565 // BX = seed 1566 CMPB runtime·useAeshash(SB), $0 1567 JEQ noaes 1568 MOVQ BX, X0 // X0 = seed 1569 PINSRD $2, (AX), X0 // data 1570 AESENC runtime·aeskeysched+0(SB), X0 1571 AESENC runtime·aeskeysched+16(SB), X0 1572 AESENC runtime·aeskeysched+32(SB), X0 1573 MOVQ X0, AX // return X0 1574 RET 1575 noaes: 1576 JMP runtime·memhash32Fallback<ABIInternal>(SB) 1577 1578 // func memhash64(p unsafe.Pointer, h uintptr) uintptr 1579 // ABIInternal for performance. 1580 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24 1581 // AX = ptr to data 1582 // BX = seed 1583 CMPB runtime·useAeshash(SB), $0 1584 JEQ noaes 1585 MOVQ BX, X0 // X0 = seed 1586 PINSRQ $1, (AX), X0 // data 1587 AESENC runtime·aeskeysched+0(SB), X0 1588 AESENC runtime·aeskeysched+16(SB), X0 1589 AESENC runtime·aeskeysched+32(SB), X0 1590 MOVQ X0, AX // return X0 1591 RET 1592 noaes: 1593 JMP runtime·memhash64Fallback<ABIInternal>(SB) 1594 1595 // simple mask to get rid of data in the high part of the register. 1596 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1597 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1598 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1599 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1600 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1601 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1602 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1603 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1604 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1605 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1606 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1607 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1608 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1609 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1610 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1611 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1612 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1613 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1614 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1615 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1616 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1617 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1618 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1619 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1620 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1621 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1622 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1623 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1624 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1625 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1626 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1627 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1628 GLOBL masks<>(SB),RODATA,$256 1629 1630 // func checkASM() bool 1631 TEXT ·checkASM(SB),NOSPLIT,$0-1 1632 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1633 MOVQ $masks<>(SB), AX 1634 MOVQ $shifts<>(SB), BX 1635 ORQ BX, AX 1636 TESTQ $15, AX 1637 SETEQ ret+0(FP) 1638 RET 1639 1640 // these are arguments to pshufb. They move data down from 1641 // the high bytes of the register to the low bytes of the register. 1642 // index is how many bytes to move. 1643 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1644 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1645 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1646 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1647 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1648 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1649 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1650 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1651 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1652 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1653 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1654 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1655 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1656 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1657 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1658 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1659 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1660 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1661 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1662 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1663 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1664 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1665 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1666 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1667 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1668 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1669 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1670 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1671 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1672 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1673 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1674 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1675 GLOBL shifts<>(SB),RODATA,$256 1676 1677 TEXT runtime·return0(SB), NOSPLIT, $0 1678 MOVL $0, AX 1679 RET 1680 1681 1682 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1683 // Must obey the gcc calling convention. 1684 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1685 get_tls(CX) 1686 MOVQ g(CX), AX 1687 MOVQ g_m(AX), AX 1688 MOVQ m_curg(AX), AX 1689 MOVQ (g_stack+stack_hi)(AX), AX 1690 RET 1691 1692 // The top-most function running on a goroutine 1693 // returns to goexit+PCQuantum. 1694 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0 1695 BYTE $0x90 // NOP 1696 CALL runtime·goexit1(SB) // does not return 1697 // traceback from goexit1 must hit code range of goexit 1698 BYTE $0x90 // NOP 1699 1700 // This is called from .init_array and follows the platform, not Go, ABI. 1701 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1702 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1703 MOVQ runtime·lastmoduledatap(SB), AX 1704 MOVQ DI, moduledata_next(AX) 1705 MOVQ DI, runtime·lastmoduledatap(SB) 1706 POPQ R15 1707 RET 1708 1709 // Initialize special registers then jump to sigpanic. 1710 // This function is injected from the signal handler for panicking 1711 // signals. It is quite painful to set X15 in the signal context, 1712 // so we do it here. 1713 TEXT ·sigpanic0(SB),NOSPLIT,$0-0 1714 get_tls(R14) 1715 MOVQ g(R14), R14 1716 #ifndef GOOS_plan9 1717 XORPS X15, X15 1718 #endif 1719 JMP ·sigpanic<ABIInternal>(SB) 1720 1721 // gcWriteBarrier informs the GC about heap pointer writes. 1722 // 1723 // gcWriteBarrier returns space in a write barrier buffer which 1724 // should be filled in by the caller. 1725 // gcWriteBarrier does NOT follow the Go ABI. It accepts the 1726 // number of bytes of buffer needed in R11, and returns a pointer 1727 // to the buffer space in R11. 1728 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1729 // but may clobber others (e.g., SSE registers). 1730 // Typical use would be, when doing *(CX+88) = AX 1731 // CMPL $0, runtime.writeBarrier(SB) 1732 // JEQ dowrite 1733 // CALL runtime.gcBatchBarrier2(SB) 1734 // MOVQ AX, (R11) 1735 // MOVQ 88(CX), DX 1736 // MOVQ DX, 8(R11) 1737 // dowrite: 1738 // MOVQ AX, 88(CX) 1739 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112 1740 // Save the registers clobbered by the fast path. This is slightly 1741 // faster than having the caller spill these. 1742 MOVQ R12, 96(SP) 1743 MOVQ R13, 104(SP) 1744 retry: 1745 // TODO: Consider passing g.m.p in as an argument so they can be shared 1746 // across a sequence of write barriers. 1747 MOVQ g_m(R14), R13 1748 MOVQ m_p(R13), R13 1749 // Get current buffer write position. 1750 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position 1751 ADDQ R11, R12 // new next position 1752 // Is the buffer full? 1753 CMPQ R12, (p_wbBuf+wbBuf_end)(R13) 1754 JA flush 1755 // Commit to the larger buffer. 1756 MOVQ R12, (p_wbBuf+wbBuf_next)(R13) 1757 // Make return value (the original next position) 1758 SUBQ R11, R12 1759 MOVQ R12, R11 1760 // Restore registers. 1761 MOVQ 96(SP), R12 1762 MOVQ 104(SP), R13 1763 RET 1764 1765 flush: 1766 // Save all general purpose registers since these could be 1767 // clobbered by wbBufFlush and were not saved by the caller. 1768 // It is possible for wbBufFlush to clobber other registers 1769 // (e.g., SSE registers), but the compiler takes care of saving 1770 // those in the caller if necessary. This strikes a balance 1771 // with registers that are likely to be used. 1772 // 1773 // We don't have type information for these, but all code under 1774 // here is NOSPLIT, so nothing will observe these. 1775 // 1776 // TODO: We could strike a different balance; e.g., saving X0 1777 // and not saving GP registers that are less likely to be used. 1778 MOVQ DI, 0(SP) 1779 MOVQ AX, 8(SP) 1780 MOVQ BX, 16(SP) 1781 MOVQ CX, 24(SP) 1782 MOVQ DX, 32(SP) 1783 // DI already saved 1784 MOVQ SI, 40(SP) 1785 MOVQ BP, 48(SP) 1786 MOVQ R8, 56(SP) 1787 MOVQ R9, 64(SP) 1788 MOVQ R10, 72(SP) 1789 MOVQ R11, 80(SP) 1790 // R12 already saved 1791 // R13 already saved 1792 // R14 is g 1793 MOVQ R15, 88(SP) 1794 1795 CALL runtime·wbBufFlush(SB) 1796 1797 MOVQ 0(SP), DI 1798 MOVQ 8(SP), AX 1799 MOVQ 16(SP), BX 1800 MOVQ 24(SP), CX 1801 MOVQ 32(SP), DX 1802 MOVQ 40(SP), SI 1803 MOVQ 48(SP), BP 1804 MOVQ 56(SP), R8 1805 MOVQ 64(SP), R9 1806 MOVQ 72(SP), R10 1807 MOVQ 80(SP), R11 1808 MOVQ 88(SP), R15 1809 JMP retry 1810 1811 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1812 MOVL $8, R11 1813 JMP gcWriteBarrier<>(SB) 1814 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1815 MOVL $16, R11 1816 JMP gcWriteBarrier<>(SB) 1817 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1818 MOVL $24, R11 1819 JMP gcWriteBarrier<>(SB) 1820 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1821 MOVL $32, R11 1822 JMP gcWriteBarrier<>(SB) 1823 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1824 MOVL $40, R11 1825 JMP gcWriteBarrier<>(SB) 1826 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1827 MOVL $48, R11 1828 JMP gcWriteBarrier<>(SB) 1829 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1830 MOVL $56, R11 1831 JMP gcWriteBarrier<>(SB) 1832 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1833 MOVL $64, R11 1834 JMP gcWriteBarrier<>(SB) 1835 1836 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" 1837 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below 1838 1839 // debugCallV2 is the entry point for debugger-injected function 1840 // calls on running goroutines. It informs the runtime that a 1841 // debug call has been injected and creates a call frame for the 1842 // debugger to fill in. 1843 // 1844 // To inject a function call, a debugger should: 1845 // 1. Check that the goroutine is in state _Grunning and that 1846 // there are at least 256 bytes free on the stack. 1847 // 2. Push the current PC on the stack (updating SP). 1848 // 3. Write the desired argument frame size at SP-16 (using the SP 1849 // after step 2). 1850 // 4. Save all machine registers (including flags and XMM registers) 1851 // so they can be restored later by the debugger. 1852 // 5. Set the PC to debugCallV2 and resume execution. 1853 // 1854 // If the goroutine is in state _Grunnable, then it's not generally 1855 // safe to inject a call because it may return out via other runtime 1856 // operations. Instead, the debugger should unwind the stack to find 1857 // the return to non-runtime code, add a temporary breakpoint there, 1858 // and inject the call once that breakpoint is hit. 1859 // 1860 // If the goroutine is in any other state, it's not safe to inject a call. 1861 // 1862 // This function communicates back to the debugger by setting R12 and 1863 // invoking INT3 to raise a breakpoint signal. See the comments in the 1864 // implementation for the protocol the debugger is expected to 1865 // follow. InjectDebugCall in the runtime tests demonstrates this protocol. 1866 // 1867 // The debugger must ensure that any pointers passed to the function 1868 // obey escape analysis requirements. Specifically, it must not pass 1869 // a stack pointer to an escaping argument. debugCallV2 cannot check 1870 // this invariant. 1871 // 1872 // This is ABIInternal because Go code injects its PC directly into new 1873 // goroutine stacks. 1874 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0 1875 // Save all registers that may contain pointers so they can be 1876 // conservatively scanned. 1877 // 1878 // We can't do anything that might clobber any of these 1879 // registers before this. 1880 MOVQ R15, r15-(14*8+8)(SP) 1881 MOVQ R14, r14-(13*8+8)(SP) 1882 MOVQ R13, r13-(12*8+8)(SP) 1883 MOVQ R12, r12-(11*8+8)(SP) 1884 MOVQ R11, r11-(10*8+8)(SP) 1885 MOVQ R10, r10-(9*8+8)(SP) 1886 MOVQ R9, r9-(8*8+8)(SP) 1887 MOVQ R8, r8-(7*8+8)(SP) 1888 MOVQ DI, di-(6*8+8)(SP) 1889 MOVQ SI, si-(5*8+8)(SP) 1890 MOVQ BP, bp-(4*8+8)(SP) 1891 MOVQ BX, bx-(3*8+8)(SP) 1892 MOVQ DX, dx-(2*8+8)(SP) 1893 // Save the frame size before we clobber it. Either of the last 1894 // saves could clobber this depending on whether there's a saved BP. 1895 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue 1896 MOVQ CX, cx-(1*8+8)(SP) 1897 MOVQ AX, ax-(0*8+8)(SP) 1898 1899 // Save the argument frame size. 1900 MOVQ DX, frameSize-128(SP) 1901 1902 // Perform a safe-point check. 1903 MOVQ retpc-8(FP), AX // Caller's PC 1904 MOVQ AX, 0(SP) 1905 CALL runtime·debugCallCheck(SB) 1906 MOVQ 8(SP), AX 1907 TESTQ AX, AX 1908 JZ good 1909 // The safety check failed. Put the reason string at the top 1910 // of the stack. 1911 MOVQ AX, 0(SP) 1912 MOVQ 16(SP), AX 1913 MOVQ AX, 8(SP) 1914 // Set R12 to 8 and invoke INT3. The debugger should get the 1915 // reason a call can't be injected from the top of the stack 1916 // and resume execution. 1917 MOVQ $8, R12 1918 BYTE $0xcc 1919 JMP restore 1920 1921 good: 1922 // Registers are saved and it's safe to make a call. 1923 // Open up a call frame, moving the stack if necessary. 1924 // 1925 // Once the frame is allocated, this will set R12 to 0 and 1926 // invoke INT3. The debugger should write the argument 1927 // frame for the call at SP, set up argument registers, push 1928 // the trapping PC on the stack, set the PC to the function to 1929 // call, set RDX to point to the closure (if a closure call), 1930 // and resume execution. 1931 // 1932 // If the function returns, this will set R12 to 1 and invoke 1933 // INT3. The debugger can then inspect any return value saved 1934 // on the stack at SP and in registers and resume execution again. 1935 // 1936 // If the function panics, this will set R12 to 2 and invoke INT3. 1937 // The interface{} value of the panic will be at SP. The debugger 1938 // can inspect the panic value and resume execution again. 1939 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1940 CMPQ AX, $MAXSIZE; \ 1941 JA 5(PC); \ 1942 MOVQ $NAME(SB), AX; \ 1943 MOVQ AX, 0(SP); \ 1944 CALL runtime·debugCallWrap(SB); \ 1945 JMP restore 1946 1947 MOVQ frameSize-128(SP), AX 1948 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1949 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1950 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1951 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1952 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1953 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1954 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1955 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1956 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1957 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1958 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1959 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1960 // The frame size is too large. Report the error. 1961 MOVQ $debugCallFrameTooLarge<>(SB), AX 1962 MOVQ AX, 0(SP) 1963 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string 1964 MOVQ $8, R12 1965 BYTE $0xcc 1966 JMP restore 1967 1968 restore: 1969 // Calls and failures resume here. 1970 // 1971 // Set R12 to 16 and invoke INT3. The debugger should restore 1972 // all registers except RIP and RSP and resume execution. 1973 MOVQ $16, R12 1974 BYTE $0xcc 1975 // We must not modify flags after this point. 1976 1977 // Restore pointer-containing registers, which may have been 1978 // modified from the debugger's copy by stack copying. 1979 MOVQ ax-(0*8+8)(SP), AX 1980 MOVQ cx-(1*8+8)(SP), CX 1981 MOVQ dx-(2*8+8)(SP), DX 1982 MOVQ bx-(3*8+8)(SP), BX 1983 MOVQ bp-(4*8+8)(SP), BP 1984 MOVQ si-(5*8+8)(SP), SI 1985 MOVQ di-(6*8+8)(SP), DI 1986 MOVQ r8-(7*8+8)(SP), R8 1987 MOVQ r9-(8*8+8)(SP), R9 1988 MOVQ r10-(9*8+8)(SP), R10 1989 MOVQ r11-(10*8+8)(SP), R11 1990 MOVQ r12-(11*8+8)(SP), R12 1991 MOVQ r13-(12*8+8)(SP), R13 1992 MOVQ r14-(13*8+8)(SP), R14 1993 MOVQ r15-(14*8+8)(SP), R15 1994 1995 RET 1996 1997 // runtime.debugCallCheck assumes that functions defined with the 1998 // DEBUG_CALL_FN macro are safe points to inject calls. 1999 #define DEBUG_CALL_FN(NAME,MAXSIZE) \ 2000 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 2001 NO_LOCAL_POINTERS; \ 2002 MOVQ $0, R12; \ 2003 BYTE $0xcc; \ 2004 MOVQ $1, R12; \ 2005 BYTE $0xcc; \ 2006 RET 2007 DEBUG_CALL_FN(debugCall32<>, 32) 2008 DEBUG_CALL_FN(debugCall64<>, 64) 2009 DEBUG_CALL_FN(debugCall128<>, 128) 2010 DEBUG_CALL_FN(debugCall256<>, 256) 2011 DEBUG_CALL_FN(debugCall512<>, 512) 2012 DEBUG_CALL_FN(debugCall1024<>, 1024) 2013 DEBUG_CALL_FN(debugCall2048<>, 2048) 2014 DEBUG_CALL_FN(debugCall4096<>, 4096) 2015 DEBUG_CALL_FN(debugCall8192<>, 8192) 2016 DEBUG_CALL_FN(debugCall16384<>, 16384) 2017 DEBUG_CALL_FN(debugCall32768<>, 32768) 2018 DEBUG_CALL_FN(debugCall65536<>, 65536) 2019 2020 // func debugCallPanicked(val interface{}) 2021 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 2022 // Copy the panic value to the top of stack. 2023 MOVQ val_type+0(FP), AX 2024 MOVQ AX, 0(SP) 2025 MOVQ val_data+8(FP), AX 2026 MOVQ AX, 8(SP) 2027 MOVQ $2, R12 2028 BYTE $0xcc 2029 RET 2030 2031 // Note: these functions use a special calling convention to save generated code space. 2032 // Arguments are passed in registers, but the space for those arguments are allocated 2033 // in the caller's stack frame. These stubs write the args into that stack space and 2034 // then tail call to the corresponding runtime handler. 2035 // The tail call makes these stubs disappear in backtraces. 2036 // Defined as ABIInternal since they do not use the stack-based Go ABI. 2037 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16 2038 MOVQ CX, BX 2039 JMP runtime·goPanicIndex<ABIInternal>(SB) 2040 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16 2041 MOVQ CX, BX 2042 JMP runtime·goPanicIndexU<ABIInternal>(SB) 2043 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16 2044 MOVQ CX, AX 2045 MOVQ DX, BX 2046 JMP runtime·goPanicSliceAlen<ABIInternal>(SB) 2047 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16 2048 MOVQ CX, AX 2049 MOVQ DX, BX 2050 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB) 2051 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16 2052 MOVQ CX, AX 2053 MOVQ DX, BX 2054 JMP runtime·goPanicSliceAcap<ABIInternal>(SB) 2055 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16 2056 MOVQ CX, AX 2057 MOVQ DX, BX 2058 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB) 2059 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16 2060 MOVQ CX, BX 2061 JMP runtime·goPanicSliceB<ABIInternal>(SB) 2062 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16 2063 MOVQ CX, BX 2064 JMP runtime·goPanicSliceBU<ABIInternal>(SB) 2065 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16 2066 MOVQ DX, AX 2067 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB) 2068 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16 2069 MOVQ DX, AX 2070 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB) 2071 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16 2072 MOVQ DX, AX 2073 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB) 2074 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16 2075 MOVQ DX, AX 2076 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB) 2077 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16 2078 MOVQ CX, AX 2079 MOVQ DX, BX 2080 JMP runtime·goPanicSlice3B<ABIInternal>(SB) 2081 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16 2082 MOVQ CX, AX 2083 MOVQ DX, BX 2084 JMP runtime·goPanicSlice3BU<ABIInternal>(SB) 2085 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16 2086 MOVQ CX, BX 2087 JMP runtime·goPanicSlice3C<ABIInternal>(SB) 2088 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16 2089 MOVQ CX, BX 2090 JMP runtime·goPanicSlice3CU<ABIInternal>(SB) 2091 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16 2092 MOVQ DX, AX 2093 JMP runtime·goPanicSliceConvert<ABIInternal>(SB) 2094 2095 #ifdef GOOS_android 2096 // Use the free TLS_SLOT_APP slot #2 on Android Q. 2097 // Earlier androids are set up in gcc_android.c. 2098 DATA runtime·tls_g+0(SB)/8, $16 2099 GLOBL runtime·tls_g+0(SB), NOPTR, $8 2100 #endif 2101 #ifdef GOOS_windows 2102 GLOBL runtime·tls_g+0(SB), NOPTR, $8 2103 #endif 2104 2105 // The compiler and assembler's -spectre=ret mode rewrites 2106 // all indirect CALL AX / JMP AX instructions to be 2107 // CALL retpolineAX / JMP retpolineAX. 2108 // See https://support.google.com/faqs/answer/7625886. 2109 #define RETPOLINE(reg) \ 2110 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \ 2111 /* nospec: */ \ 2112 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \ 2113 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \ 2114 /* setup: */ \ 2115 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \ 2116 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \ 2117 /* RET */ BYTE $0xC3 2118 2119 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0) 2120 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1) 2121 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2) 2122 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3) 2123 /* SP is 4, can't happen / magic encodings */ 2124 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5) 2125 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6) 2126 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7) 2127 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8) 2128 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9) 2129 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10) 2130 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11) 2131 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12) 2132 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13) 2133 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14) 2134 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15) 2135 2136 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 2137 MOVQ BP, AX 2138 RET