github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 #include "cgo/abi_amd64.h" 10 11 // _rt0_amd64 is common startup code for most amd64 systems when using 12 // internal linking. This is the entry point for the program from the 13 // kernel for an ordinary -buildmode=exe program. The stack holds the 14 // number of arguments and the C-style argv. 15 TEXT _rt0_amd64(SB),NOSPLIT,$-8 16 MOVQ 0(SP), DI // argc 17 LEAQ 8(SP), SI // argv 18 JMP runtime·rt0_go(SB) 19 20 // main is common startup code for most amd64 systems when using 21 // external linking. The C startup code will call the symbol "main" 22 // passing argc and argv in the usual C ABI registers DI and SI. 23 TEXT main(SB),NOSPLIT,$-8 24 JMP runtime·rt0_go(SB) 25 26 // _rt0_amd64_lib is common startup code for most amd64 systems when 27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 28 // arrange to invoke this function as a global constructor (for 29 // c-archive) or when the shared library is loaded (for c-shared). 30 // We expect argc and argv to be passed in the usual C ABI registers 31 // DI and SI. 32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0 33 // Transition from C ABI to Go ABI. 34 PUSH_REGS_HOST_TO_ABI0() 35 36 MOVQ DI, _rt0_amd64_lib_argc<>(SB) 37 MOVQ SI, _rt0_amd64_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 // Create a new thread to finish Go runtime initialization. 43 MOVQ _cgo_sys_thread_create(SB), AX 44 TESTQ AX, AX 45 JZ nocgo 46 47 // We're calling back to C. 48 // Align stack per ELF ABI requirements. 49 MOVQ SP, BX // Callee-save in C ABI 50 ANDQ $~15, SP 51 MOVQ $_rt0_amd64_lib_go(SB), DI 52 MOVQ $0, SI 53 CALL AX 54 MOVQ BX, SP 55 JMP restore 56 57 nocgo: 58 ADJSP $16 59 MOVQ $0x800000, 0(SP) // stacksize 60 MOVQ $_rt0_amd64_lib_go(SB), AX 61 MOVQ AX, 8(SP) // fn 62 CALL runtime·newosproc0(SB) 63 ADJSP $-16 64 65 restore: 66 POP_REGS_HOST_TO_ABI0() 67 RET 68 69 // _rt0_amd64_lib_go initializes the Go runtime. 70 // This is started in a separate thread by _rt0_amd64_lib. 71 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 72 MOVQ _rt0_amd64_lib_argc<>(SB), DI 73 MOVQ _rt0_amd64_lib_argv<>(SB), SI 74 JMP runtime·rt0_go(SB) 75 76 DATA _rt0_amd64_lib_argc<>(SB)/8, $0 77 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 78 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 79 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 80 81 #ifdef GOAMD64_v2 82 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n" 83 #endif 84 85 #ifdef GOAMD64_v3 86 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n" 87 #endif 88 89 #ifdef GOAMD64_v4 90 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n" 91 #endif 92 93 GLOBL bad_cpu_msg<>(SB), RODATA, $84 94 95 // Define a list of AMD64 microarchitecture level features 96 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 97 98 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT 99 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23) 100 // LAHF/SAHF 101 #define V2_EXT_FEATURES_CX (1 << 0) 102 // FMA MOVBE OSXSAVE AVX F16C 103 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29) 104 // ABM (FOR LZNCT) 105 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5) 106 // BMI1 AVX2 BMI2 107 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8) 108 // XMM YMM 109 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2) 110 111 #define V4_FEATURES_CX V3_FEATURES_CX 112 113 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX 114 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL 115 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31) 116 // OPMASK ZMM 117 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7)) 118 119 #ifdef GOAMD64_v2 120 #define NEED_MAX_CPUID 0x80000001 121 #define NEED_FEATURES_CX V2_FEATURES_CX 122 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX 123 #endif 124 125 #ifdef GOAMD64_v3 126 #define NEED_MAX_CPUID 0x80000001 127 #define NEED_FEATURES_CX V3_FEATURES_CX 128 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX 129 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX 130 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX 131 #endif 132 133 #ifdef GOAMD64_v4 134 #define NEED_MAX_CPUID 0x80000001 135 #define NEED_FEATURES_CX V4_FEATURES_CX 136 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX 137 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX 138 139 // Darwin requires a different approach to check AVX512 support, see CL 285572. 140 #ifdef GOOS_darwin 141 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX 142 // These values are from: 143 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h 144 #define commpage64_base_address 0x00007fffffe00000 145 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010) 146 #define commpage64_version (commpage64_base_address+0x01E) 147 #define hasAVX512F 0x0000004000000000 148 #define hasAVX512CD 0x0000008000000000 149 #define hasAVX512DQ 0x0000010000000000 150 #define hasAVX512BW 0x0000020000000000 151 #define hasAVX512VL 0x0000100000000000 152 #define NEED_DARWIN_SUPPORT (hasAVX512F | hasAVX512DQ | hasAVX512CD | hasAVX512BW | hasAVX512VL) 153 #else 154 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX 155 #endif 156 157 #endif 158 159 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 160 // copy arguments forward on an even stack 161 MOVQ DI, AX // argc 162 MOVQ SI, BX // argv 163 SUBQ $(5*8), SP // 3args 2auto 164 ANDQ $~15, SP 165 MOVQ AX, 24(SP) 166 MOVQ BX, 32(SP) 167 168 // create istack out of the given (operating system) stack. 169 // _cgo_init may update stackguard. 170 MOVQ $runtime·g0(SB), DI 171 LEAQ (-64*1024)(SP), BX 172 MOVQ BX, g_stackguard0(DI) 173 MOVQ BX, g_stackguard1(DI) 174 MOVQ BX, (g_stack+stack_lo)(DI) 175 MOVQ SP, (g_stack+stack_hi)(DI) 176 177 // find out information about the processor we're on 178 MOVL $0, AX 179 CPUID 180 CMPL AX, $0 181 JE nocpuinfo 182 183 CMPL BX, $0x756E6547 // "Genu" 184 JNE notintel 185 CMPL DX, $0x49656E69 // "ineI" 186 JNE notintel 187 CMPL CX, $0x6C65746E // "ntel" 188 JNE notintel 189 MOVB $1, runtime·isIntel(SB) 190 191 notintel: 192 // Load EAX=1 cpuid flags 193 MOVL $1, AX 194 CPUID 195 MOVL AX, runtime·processorVersionInfo(SB) 196 197 nocpuinfo: 198 // if there is an _cgo_init, call it. 199 MOVQ _cgo_init(SB), AX 200 TESTQ AX, AX 201 JZ needtls 202 // arg 1: g0, already in DI 203 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc 204 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS 205 MOVQ $0, CX 206 #ifdef GOOS_android 207 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 208 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 209 // Compensate for tls_g (+16). 210 MOVQ -16(TLS), CX 211 #endif 212 #ifdef GOOS_windows 213 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 214 // Adjust for the Win64 calling convention. 215 MOVQ CX, R9 // arg 4 216 MOVQ DX, R8 // arg 3 217 MOVQ SI, DX // arg 2 218 MOVQ DI, CX // arg 1 219 #endif 220 CALL AX 221 222 // update stackguard after _cgo_init 223 MOVQ $runtime·g0(SB), CX 224 MOVQ (g_stack+stack_lo)(CX), AX 225 ADDQ $const_stackGuard, AX 226 MOVQ AX, g_stackguard0(CX) 227 MOVQ AX, g_stackguard1(CX) 228 229 #ifndef GOOS_windows 230 JMP ok 231 #endif 232 needtls: 233 #ifdef GOOS_plan9 234 // skip TLS setup on Plan 9 235 JMP ok 236 #endif 237 #ifdef GOOS_solaris 238 // skip TLS setup on Solaris 239 JMP ok 240 #endif 241 #ifdef GOOS_illumos 242 // skip TLS setup on illumos 243 JMP ok 244 #endif 245 #ifdef GOOS_darwin 246 // skip TLS setup on Darwin 247 JMP ok 248 #endif 249 #ifdef GOOS_openbsd 250 // skip TLS setup on OpenBSD 251 JMP ok 252 #endif 253 254 #ifdef GOOS_windows 255 CALL runtime·wintls(SB) 256 #endif 257 258 LEAQ runtime·m0+m_tls(SB), DI 259 CALL runtime·settls(SB) 260 261 // store through it, to make sure it works 262 get_tls(BX) 263 MOVQ $0x123, g(BX) 264 MOVQ runtime·m0+m_tls(SB), AX 265 CMPQ AX, $0x123 266 JEQ 2(PC) 267 CALL runtime·abort(SB) 268 ok: 269 // set the per-goroutine and per-mach "registers" 270 get_tls(BX) 271 LEAQ runtime·g0(SB), CX 272 MOVQ CX, g(BX) 273 LEAQ runtime·m0(SB), AX 274 275 // save m->g0 = g0 276 MOVQ CX, m_g0(AX) 277 // save m0 to g0->m 278 MOVQ AX, g_m(CX) 279 280 CLD // convention is D is always left cleared 281 282 // Check GOAMD64 requirements 283 // We need to do this after setting up TLS, so that 284 // we can report an error if there is a failure. See issue 49586. 285 #ifdef NEED_FEATURES_CX 286 MOVL $0, AX 287 CPUID 288 CMPL AX, $0 289 JE bad_cpu 290 MOVL $1, AX 291 CPUID 292 ANDL $NEED_FEATURES_CX, CX 293 CMPL CX, $NEED_FEATURES_CX 294 JNE bad_cpu 295 #endif 296 297 #ifdef NEED_MAX_CPUID 298 MOVL $0x80000000, AX 299 CPUID 300 CMPL AX, $NEED_MAX_CPUID 301 JL bad_cpu 302 #endif 303 304 #ifdef NEED_EXT_FEATURES_BX 305 MOVL $7, AX 306 MOVL $0, CX 307 CPUID 308 ANDL $NEED_EXT_FEATURES_BX, BX 309 CMPL BX, $NEED_EXT_FEATURES_BX 310 JNE bad_cpu 311 #endif 312 313 #ifdef NEED_EXT_FEATURES_CX 314 MOVL $0x80000001, AX 315 CPUID 316 ANDL $NEED_EXT_FEATURES_CX, CX 317 CMPL CX, $NEED_EXT_FEATURES_CX 318 JNE bad_cpu 319 #endif 320 321 #ifdef NEED_OS_SUPPORT_AX 322 XORL CX, CX 323 XGETBV 324 ANDL $NEED_OS_SUPPORT_AX, AX 325 CMPL AX, $NEED_OS_SUPPORT_AX 326 JNE bad_cpu 327 #endif 328 329 #ifdef NEED_DARWIN_SUPPORT 330 MOVQ $commpage64_version, BX 331 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13 332 JL bad_cpu 333 MOVQ $commpage64_cpu_capabilities64, BX 334 MOVQ (BX), BX 335 MOVQ $NEED_DARWIN_SUPPORT, CX 336 ANDQ CX, BX 337 CMPQ BX, CX 338 JNE bad_cpu 339 #endif 340 341 CALL runtime·check(SB) 342 343 MOVL 24(SP), AX // copy argc 344 MOVL AX, 0(SP) 345 MOVQ 32(SP), AX // copy argv 346 MOVQ AX, 8(SP) 347 CALL runtime·args(SB) 348 CALL runtime·osinit(SB) 349 CALL runtime·schedinit(SB) 350 351 // create a new goroutine to start program 352 MOVQ $runtime·mainPC(SB), AX // entry 353 PUSHQ AX 354 CALL runtime·newproc(SB) 355 POPQ AX 356 357 // start this M 358 CALL runtime·mstart(SB) 359 360 CALL runtime·abort(SB) // mstart should never return 361 RET 362 363 bad_cpu: // show that the program requires a certain microarchitecture level. 364 MOVQ $2, 0(SP) 365 MOVQ $bad_cpu_msg<>(SB), AX 366 MOVQ AX, 8(SP) 367 MOVQ $84, 16(SP) 368 CALL runtime·write(SB) 369 MOVQ $1, 0(SP) 370 CALL runtime·exit(SB) 371 CALL runtime·abort(SB) 372 RET 373 374 // Prevent dead-code elimination of debugCallV2, which is 375 // intended to be called by debuggers. 376 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX 377 RET 378 379 // mainPC is a function value for runtime.main, to be passed to newproc. 380 // The reference to runtime.main is made via ABIInternal, since the 381 // actual function (not the ABI0 wrapper) is needed by newproc. 382 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB) 383 GLOBL runtime·mainPC(SB),RODATA,$8 384 385 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 386 BYTE $0xcc 387 RET 388 389 TEXT runtime·asminit(SB),NOSPLIT,$0-0 390 // No per-thread init. 391 RET 392 393 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 394 CALL runtime·mstart0(SB) 395 RET // not reached 396 397 /* 398 * go-routine 399 */ 400 401 // func gogo(buf *gobuf) 402 // restore state from Gobuf; longjmp 403 TEXT runtime·gogo(SB), NOSPLIT, $0-8 404 MOVQ buf+0(FP), BX // gobuf 405 MOVQ gobuf_g(BX), DX 406 MOVQ 0(DX), CX // make sure g != nil 407 JMP gogo<>(SB) 408 409 TEXT gogo<>(SB), NOSPLIT, $0 410 get_tls(CX) 411 MOVQ DX, g(CX) 412 MOVQ DX, R14 // set the g register 413 MOVQ gobuf_sp(BX), SP // restore SP 414 MOVQ gobuf_ret(BX), AX 415 MOVQ gobuf_ctxt(BX), DX 416 MOVQ gobuf_bp(BX), BP 417 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 418 MOVQ $0, gobuf_ret(BX) 419 MOVQ $0, gobuf_ctxt(BX) 420 MOVQ $0, gobuf_bp(BX) 421 MOVQ gobuf_pc(BX), BX 422 JMP BX 423 424 // func mcall(fn func(*g)) 425 // Switch to m->g0's stack, call fn(g). 426 // Fn must never return. It should gogo(&g->sched) 427 // to keep running g. 428 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8 429 MOVQ AX, DX // DX = fn 430 431 // Save state in g->sched. The caller's SP and PC are restored by gogo to 432 // resume execution in the caller's frame (implicit return). The caller's BP 433 // is also restored to support frame pointer unwinding. 434 MOVQ SP, BX // hide (SP) reads from vet 435 MOVQ 8(BX), BX // caller's PC 436 MOVQ BX, (g_sched+gobuf_pc)(R14) 437 LEAQ fn+0(FP), BX // caller's SP 438 MOVQ BX, (g_sched+gobuf_sp)(R14) 439 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is 440 // can cause a frame pointer cycle, see CL 476235. 441 MOVQ (BP), BX // caller's BP 442 MOVQ BX, (g_sched+gobuf_bp)(R14) 443 444 // switch to m->g0 & its stack, call fn 445 MOVQ g_m(R14), BX 446 MOVQ m_g0(BX), SI // SI = g.m.g0 447 CMPQ SI, R14 // if g == m->g0 call badmcall 448 JNE goodm 449 JMP runtime·badmcall(SB) 450 goodm: 451 MOVQ R14, AX // AX (and arg 0) = g 452 MOVQ SI, R14 // g = g.m.g0 453 get_tls(CX) // Set G in TLS 454 MOVQ R14, g(CX) 455 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp 456 PUSHQ AX // open up space for fn's arg spill slot 457 MOVQ 0(DX), R12 458 CALL R12 // fn(g) 459 POPQ AX 460 JMP runtime·badmcall2(SB) 461 RET 462 463 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 464 // of the G stack. We need to distinguish the routine that 465 // lives at the bottom of the G stack from the one that lives 466 // at the top of the system stack because the one at the top of 467 // the system stack terminates the stack walk (see topofstack()). 468 // The frame layout needs to match systemstack 469 // so that it can pretend to be systemstack_switch. 470 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 471 UNDEF 472 // Make sure this function is not leaf, 473 // so the frame is saved. 474 CALL runtime·abort(SB) 475 RET 476 477 // func systemstack(fn func()) 478 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 479 MOVQ fn+0(FP), DI // DI = fn 480 get_tls(CX) 481 MOVQ g(CX), AX // AX = g 482 MOVQ g_m(AX), BX // BX = m 483 484 CMPQ AX, m_gsignal(BX) 485 JEQ noswitch 486 487 MOVQ m_g0(BX), DX // DX = g0 488 CMPQ AX, DX 489 JEQ noswitch 490 491 CMPQ AX, m_curg(BX) 492 JNE bad 493 494 // Switch stacks. 495 // The original frame pointer is stored in BP, 496 // which is useful for stack unwinding. 497 // Save our state in g->sched. Pretend to 498 // be systemstack_switch if the G stack is scanned. 499 CALL gosave_systemstack_switch<>(SB) 500 501 // switch to g0 502 MOVQ DX, g(CX) 503 MOVQ DX, R14 // set the g register 504 MOVQ (g_sched+gobuf_sp)(DX), SP 505 506 // call target function 507 MOVQ DI, DX 508 MOVQ 0(DI), DI 509 CALL DI 510 511 // switch back to g 512 get_tls(CX) 513 MOVQ g(CX), AX 514 MOVQ g_m(AX), BX 515 MOVQ m_curg(BX), AX 516 MOVQ AX, g(CX) 517 MOVQ (g_sched+gobuf_sp)(AX), SP 518 MOVQ (g_sched+gobuf_bp)(AX), BP 519 MOVQ $0, (g_sched+gobuf_sp)(AX) 520 MOVQ $0, (g_sched+gobuf_bp)(AX) 521 RET 522 523 noswitch: 524 // already on m stack; tail call the function 525 // Using a tail call here cleans up tracebacks since we won't stop 526 // at an intermediate systemstack. 527 MOVQ DI, DX 528 MOVQ 0(DI), DI 529 // The function epilogue is not called on a tail call. 530 // Pop BP from the stack to simulate it. 531 POPQ BP 532 JMP DI 533 534 bad: 535 // Bad: g is not gsignal, not g0, not curg. What is it? 536 MOVQ $runtime·badsystemstack(SB), AX 537 CALL AX 538 INT $3 539 540 541 /* 542 * support for morestack 543 */ 544 545 // Called during function prolog when more stack is needed. 546 // 547 // The traceback routines see morestack on a g0 as being 548 // the top of a stack (for example, morestack calling newstack 549 // calling the scheduler calling newm calling gc), so we must 550 // record an argument size. For that purpose, it has no arguments. 551 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 552 // Cannot grow scheduler stack (m->g0). 553 get_tls(CX) 554 MOVQ g(CX), BX 555 MOVQ g_m(BX), BX 556 MOVQ m_g0(BX), SI 557 CMPQ g(CX), SI 558 JNE 3(PC) 559 CALL runtime·badmorestackg0(SB) 560 CALL runtime·abort(SB) 561 562 // Cannot grow signal stack (m->gsignal). 563 MOVQ m_gsignal(BX), SI 564 CMPQ g(CX), SI 565 JNE 3(PC) 566 CALL runtime·badmorestackgsignal(SB) 567 CALL runtime·abort(SB) 568 569 // Called from f. 570 // Set m->morebuf to f's caller. 571 NOP SP // tell vet SP changed - stop checking offsets 572 MOVQ 8(SP), AX // f's caller's PC 573 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 574 LEAQ 16(SP), AX // f's caller's SP 575 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 576 get_tls(CX) 577 MOVQ g(CX), SI 578 MOVQ SI, (m_morebuf+gobuf_g)(BX) 579 580 // Set g->sched to context in f. 581 MOVQ 0(SP), AX // f's PC 582 MOVQ AX, (g_sched+gobuf_pc)(SI) 583 LEAQ 8(SP), AX // f's SP 584 MOVQ AX, (g_sched+gobuf_sp)(SI) 585 MOVQ BP, (g_sched+gobuf_bp)(SI) 586 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 587 588 // Call newstack on m->g0's stack. 589 MOVQ m_g0(BX), BX 590 MOVQ BX, g(CX) 591 MOVQ (g_sched+gobuf_sp)(BX), SP 592 MOVQ (g_sched+gobuf_bp)(BX), BP 593 CALL runtime·newstack(SB) 594 CALL runtime·abort(SB) // crash if newstack returns 595 RET 596 597 // morestack but not preserving ctxt. 598 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 599 MOVL $0, DX 600 JMP runtime·morestack(SB) 601 602 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12. 603 TEXT ·spillArgs(SB),NOSPLIT,$0-0 604 MOVQ AX, 0(R12) 605 MOVQ BX, 8(R12) 606 MOVQ CX, 16(R12) 607 MOVQ DI, 24(R12) 608 MOVQ SI, 32(R12) 609 MOVQ R8, 40(R12) 610 MOVQ R9, 48(R12) 611 MOVQ R10, 56(R12) 612 MOVQ R11, 64(R12) 613 MOVQ X0, 72(R12) 614 MOVQ X1, 80(R12) 615 MOVQ X2, 88(R12) 616 MOVQ X3, 96(R12) 617 MOVQ X4, 104(R12) 618 MOVQ X5, 112(R12) 619 MOVQ X6, 120(R12) 620 MOVQ X7, 128(R12) 621 MOVQ X8, 136(R12) 622 MOVQ X9, 144(R12) 623 MOVQ X10, 152(R12) 624 MOVQ X11, 160(R12) 625 MOVQ X12, 168(R12) 626 MOVQ X13, 176(R12) 627 MOVQ X14, 184(R12) 628 RET 629 630 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12. 631 TEXT ·unspillArgs(SB),NOSPLIT,$0-0 632 MOVQ 0(R12), AX 633 MOVQ 8(R12), BX 634 MOVQ 16(R12), CX 635 MOVQ 24(R12), DI 636 MOVQ 32(R12), SI 637 MOVQ 40(R12), R8 638 MOVQ 48(R12), R9 639 MOVQ 56(R12), R10 640 MOVQ 64(R12), R11 641 MOVQ 72(R12), X0 642 MOVQ 80(R12), X1 643 MOVQ 88(R12), X2 644 MOVQ 96(R12), X3 645 MOVQ 104(R12), X4 646 MOVQ 112(R12), X5 647 MOVQ 120(R12), X6 648 MOVQ 128(R12), X7 649 MOVQ 136(R12), X8 650 MOVQ 144(R12), X9 651 MOVQ 152(R12), X10 652 MOVQ 160(R12), X11 653 MOVQ 168(R12), X12 654 MOVQ 176(R12), X13 655 MOVQ 184(R12), X14 656 RET 657 658 // reflectcall: call a function with the given argument list 659 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 660 // we don't have variable-sized frames, so we use a small number 661 // of constant-sized-frame functions to encode a few bits of size in the pc. 662 // Caution: ugly multiline assembly macros in your future! 663 664 #define DISPATCH(NAME,MAXSIZE) \ 665 CMPQ CX, $MAXSIZE; \ 666 JA 3(PC); \ 667 MOVQ $NAME(SB), AX; \ 668 JMP AX 669 // Note: can't just "JMP NAME(SB)" - bad inlining results. 670 671 TEXT ·reflectcall(SB), NOSPLIT, $0-48 672 MOVLQZX frameSize+32(FP), CX 673 DISPATCH(runtime·call16, 16) 674 DISPATCH(runtime·call32, 32) 675 DISPATCH(runtime·call64, 64) 676 DISPATCH(runtime·call128, 128) 677 DISPATCH(runtime·call256, 256) 678 DISPATCH(runtime·call512, 512) 679 DISPATCH(runtime·call1024, 1024) 680 DISPATCH(runtime·call2048, 2048) 681 DISPATCH(runtime·call4096, 4096) 682 DISPATCH(runtime·call8192, 8192) 683 DISPATCH(runtime·call16384, 16384) 684 DISPATCH(runtime·call32768, 32768) 685 DISPATCH(runtime·call65536, 65536) 686 DISPATCH(runtime·call131072, 131072) 687 DISPATCH(runtime·call262144, 262144) 688 DISPATCH(runtime·call524288, 524288) 689 DISPATCH(runtime·call1048576, 1048576) 690 DISPATCH(runtime·call2097152, 2097152) 691 DISPATCH(runtime·call4194304, 4194304) 692 DISPATCH(runtime·call8388608, 8388608) 693 DISPATCH(runtime·call16777216, 16777216) 694 DISPATCH(runtime·call33554432, 33554432) 695 DISPATCH(runtime·call67108864, 67108864) 696 DISPATCH(runtime·call134217728, 134217728) 697 DISPATCH(runtime·call268435456, 268435456) 698 DISPATCH(runtime·call536870912, 536870912) 699 DISPATCH(runtime·call1073741824, 1073741824) 700 MOVQ $runtime·badreflectcall(SB), AX 701 JMP AX 702 703 #define CALLFN(NAME,MAXSIZE) \ 704 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \ 705 NO_LOCAL_POINTERS; \ 706 /* copy arguments to stack */ \ 707 MOVQ stackArgs+16(FP), SI; \ 708 MOVLQZX stackArgsSize+24(FP), CX; \ 709 MOVQ SP, DI; \ 710 REP;MOVSB; \ 711 /* set up argument registers */ \ 712 MOVQ regArgs+40(FP), R12; \ 713 CALL ·unspillArgs(SB); \ 714 /* call function */ \ 715 MOVQ f+8(FP), DX; \ 716 PCDATA $PCDATA_StackMapIndex, $0; \ 717 MOVQ (DX), R12; \ 718 CALL R12; \ 719 /* copy register return values back */ \ 720 MOVQ regArgs+40(FP), R12; \ 721 CALL ·spillArgs(SB); \ 722 MOVLQZX stackArgsSize+24(FP), CX; \ 723 MOVLQZX stackRetOffset+28(FP), BX; \ 724 MOVQ stackArgs+16(FP), DI; \ 725 MOVQ stackArgsType+0(FP), DX; \ 726 MOVQ SP, SI; \ 727 ADDQ BX, DI; \ 728 ADDQ BX, SI; \ 729 SUBQ BX, CX; \ 730 CALL callRet<>(SB); \ 731 RET 732 733 // callRet copies return values back at the end of call*. This is a 734 // separate function so it can allocate stack space for the arguments 735 // to reflectcallmove. It does not follow the Go ABI; it expects its 736 // arguments in registers. 737 TEXT callRet<>(SB), NOSPLIT, $40-0 738 NO_LOCAL_POINTERS 739 MOVQ DX, 0(SP) 740 MOVQ DI, 8(SP) 741 MOVQ SI, 16(SP) 742 MOVQ CX, 24(SP) 743 MOVQ R12, 32(SP) 744 CALL runtime·reflectcallmove(SB) 745 RET 746 747 CALLFN(·call16, 16) 748 CALLFN(·call32, 32) 749 CALLFN(·call64, 64) 750 CALLFN(·call128, 128) 751 CALLFN(·call256, 256) 752 CALLFN(·call512, 512) 753 CALLFN(·call1024, 1024) 754 CALLFN(·call2048, 2048) 755 CALLFN(·call4096, 4096) 756 CALLFN(·call8192, 8192) 757 CALLFN(·call16384, 16384) 758 CALLFN(·call32768, 32768) 759 CALLFN(·call65536, 65536) 760 CALLFN(·call131072, 131072) 761 CALLFN(·call262144, 262144) 762 CALLFN(·call524288, 524288) 763 CALLFN(·call1048576, 1048576) 764 CALLFN(·call2097152, 2097152) 765 CALLFN(·call4194304, 4194304) 766 CALLFN(·call8388608, 8388608) 767 CALLFN(·call16777216, 16777216) 768 CALLFN(·call33554432, 33554432) 769 CALLFN(·call67108864, 67108864) 770 CALLFN(·call134217728, 134217728) 771 CALLFN(·call268435456, 268435456) 772 CALLFN(·call536870912, 536870912) 773 CALLFN(·call1073741824, 1073741824) 774 775 TEXT runtime·procyield(SB),NOSPLIT,$0-0 776 MOVL cycles+0(FP), AX 777 again: 778 PAUSE 779 SUBL $1, AX 780 JNZ again 781 RET 782 783 784 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0 785 // Stores are already ordered on x86, so this is just a 786 // compile barrier. 787 RET 788 789 // Save state of caller into g->sched, 790 // but using fake PC from systemstack_switch. 791 // Must only be called from functions with frame pointer 792 // and without locals ($0) or else unwinding from 793 // systemstack_switch is incorrect. 794 // Smashes R9. 795 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0 796 // Take systemstack_switch PC and add 8 bytes to skip 797 // the prologue. The final location does not matter 798 // as long as we are between the prologue and the epilogue. 799 MOVQ $runtime·systemstack_switch+8(SB), R9 800 MOVQ R9, (g_sched+gobuf_pc)(R14) 801 LEAQ 8(SP), R9 802 MOVQ R9, (g_sched+gobuf_sp)(R14) 803 MOVQ $0, (g_sched+gobuf_ret)(R14) 804 MOVQ BP, (g_sched+gobuf_bp)(R14) 805 // Assert ctxt is zero. See func save. 806 MOVQ (g_sched+gobuf_ctxt)(R14), R9 807 TESTQ R9, R9 808 JZ 2(PC) 809 CALL runtime·abort(SB) 810 RET 811 812 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 813 // Call fn(arg) aligned appropriately for the gcc ABI. 814 // Called on a system stack, and there may be no g yet (during needm). 815 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16 816 MOVQ fn+0(FP), AX 817 MOVQ arg+8(FP), BX 818 MOVQ SP, DX 819 ANDQ $~15, SP // alignment 820 MOVQ DX, 8(SP) 821 MOVQ BX, DI // DI = first argument in AMD64 ABI 822 MOVQ BX, CX // CX = first argument in Win64 823 CALL AX 824 MOVQ 8(SP), DX 825 MOVQ DX, SP 826 RET 827 828 // func asmcgocall(fn, arg unsafe.Pointer) int32 829 // Call fn(arg) on the scheduler stack, 830 // aligned appropriately for the gcc ABI. 831 // See cgocall.go for more details. 832 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 833 MOVQ fn+0(FP), AX 834 MOVQ arg+8(FP), BX 835 836 MOVQ SP, DX 837 838 // Figure out if we need to switch to m->g0 stack. 839 // We get called to create new OS threads too, and those 840 // come in on the m->g0 stack already. Or we might already 841 // be on the m->gsignal stack. 842 get_tls(CX) 843 MOVQ g(CX), DI 844 CMPQ DI, $0 845 JEQ nosave 846 MOVQ g_m(DI), R8 847 MOVQ m_gsignal(R8), SI 848 CMPQ DI, SI 849 JEQ nosave 850 MOVQ m_g0(R8), SI 851 CMPQ DI, SI 852 JEQ nosave 853 854 // Switch to system stack. 855 // The original frame pointer is stored in BP, 856 // which is useful for stack unwinding. 857 CALL gosave_systemstack_switch<>(SB) 858 MOVQ SI, g(CX) 859 MOVQ (g_sched+gobuf_sp)(SI), SP 860 861 // Now on a scheduling stack (a pthread-created stack). 862 // Make sure we have enough room for 4 stack-backed fast-call 863 // registers as per windows amd64 calling convention. 864 SUBQ $64, SP 865 ANDQ $~15, SP // alignment for gcc ABI 866 MOVQ DI, 48(SP) // save g 867 MOVQ (g_stack+stack_hi)(DI), DI 868 SUBQ DX, DI 869 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 870 MOVQ BX, DI // DI = first argument in AMD64 ABI 871 MOVQ BX, CX // CX = first argument in Win64 872 CALL AX 873 874 // Restore registers, g, stack pointer. 875 get_tls(CX) 876 MOVQ 48(SP), DI 877 MOVQ (g_stack+stack_hi)(DI), SI 878 SUBQ 40(SP), SI 879 MOVQ DI, g(CX) 880 MOVQ SI, SP 881 882 MOVL AX, ret+16(FP) 883 RET 884 885 nosave: 886 // Running on a system stack, perhaps even without a g. 887 // Having no g can happen during thread creation or thread teardown 888 // (see needm/dropm on Solaris, for example). 889 // This code is like the above sequence but without saving/restoring g 890 // and without worrying about the stack moving out from under us 891 // (because we're on a system stack, not a goroutine stack). 892 // The above code could be used directly if already on a system stack, 893 // but then the only path through this code would be a rare case on Solaris. 894 // Using this code for all "already on system stack" calls exercises it more, 895 // which should help keep it correct. 896 SUBQ $64, SP 897 ANDQ $~15, SP 898 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging 899 MOVQ DX, 40(SP) // save original stack pointer 900 MOVQ BX, DI // DI = first argument in AMD64 ABI 901 MOVQ BX, CX // CX = first argument in Win64 902 CALL AX 903 MOVQ 40(SP), SI // restore original stack pointer 904 MOVQ SI, SP 905 MOVL AX, ret+16(FP) 906 RET 907 908 #ifdef GOOS_windows 909 // Dummy TLS that's used on Windows so that we don't crash trying 910 // to restore the G register in needm. needm and its callees are 911 // very careful never to actually use the G, the TLS just can't be 912 // unset since we're in Go code. 913 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize 914 #endif 915 916 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 917 // See cgocall.go for more details. 918 TEXT ·cgocallback(SB),NOSPLIT,$24-24 919 NO_LOCAL_POINTERS 920 921 // If g is nil, Go did not create the current thread. 922 // Call needm to obtain one m for temporary use. 923 // In this case, we're running on the thread stack, so there's 924 // lots of space, but the linker doesn't know. Hide the call from 925 // the linker analysis by using an indirect call through AX. 926 get_tls(CX) 927 #ifdef GOOS_windows 928 MOVL $0, BX 929 CMPQ CX, $0 930 JEQ 2(PC) 931 #endif 932 MOVQ g(CX), BX 933 CMPQ BX, $0 934 JEQ needm 935 MOVQ g_m(BX), BX 936 MOVQ BX, savedm-8(SP) // saved copy of oldm 937 JMP havem 938 needm: 939 #ifdef GOOS_windows 940 // Set up a dummy TLS value. needm is careful not to use it, 941 // but it needs to be there to prevent autogenerated code from 942 // crashing when it loads from it. 943 // We don't need to clear it or anything later because needm 944 // will set up TLS properly. 945 MOVQ $zeroTLS<>(SB), DI 946 CALL runtime·settls(SB) 947 #endif 948 // On some platforms (Windows) we cannot call needm through 949 // an ABI wrapper because there's no TLS set up, and the ABI 950 // wrapper will try to restore the G register (R14) from TLS. 951 // Clear X15 because Go expects it and we're not calling 952 // through a wrapper, but otherwise avoid setting the G 953 // register in the wrapper and call needm directly. It 954 // takes no arguments and doesn't return any values so 955 // there's no need to handle that. Clear R14 so that there's 956 // a bad value in there, in case needm tries to use it. 957 XORPS X15, X15 958 XORQ R14, R14 959 MOVQ $runtime·needm<ABIInternal>(SB), AX 960 CALL AX 961 MOVQ $0, savedm-8(SP) // dropm on return 962 get_tls(CX) 963 MOVQ g(CX), BX 964 MOVQ g_m(BX), BX 965 966 // Set m->sched.sp = SP, so that if a panic happens 967 // during the function we are about to execute, it will 968 // have a valid SP to run on the g0 stack. 969 // The next few lines (after the havem label) 970 // will save this SP onto the stack and then write 971 // the same SP back to m->sched.sp. That seems redundant, 972 // but if an unrecovered panic happens, unwindm will 973 // restore the g->sched.sp from the stack location 974 // and then systemstack will try to use it. If we don't set it here, 975 // that restored SP will be uninitialized (typically 0) and 976 // will not be usable. 977 MOVQ m_g0(BX), SI 978 MOVQ SP, (g_sched+gobuf_sp)(SI) 979 980 havem: 981 // Now there's a valid m, and we're running on its m->g0. 982 // Save current m->g0->sched.sp on stack and then set it to SP. 983 // Save current sp in m->g0->sched.sp in preparation for 984 // switch back to m->curg stack. 985 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 986 MOVQ m_g0(BX), SI 987 MOVQ (g_sched+gobuf_sp)(SI), AX 988 MOVQ AX, 0(SP) 989 MOVQ SP, (g_sched+gobuf_sp)(SI) 990 991 // Switch to m->curg stack and call runtime.cgocallbackg. 992 // Because we are taking over the execution of m->curg 993 // but *not* resuming what had been running, we need to 994 // save that information (m->curg->sched) so we can restore it. 995 // We can restore m->curg->sched.sp easily, because calling 996 // runtime.cgocallbackg leaves SP unchanged upon return. 997 // To save m->curg->sched.pc, we push it onto the curg stack and 998 // open a frame the same size as cgocallback's g0 frame. 999 // Once we switch to the curg stack, the pushed PC will appear 1000 // to be the return PC of cgocallback, so that the traceback 1001 // will seamlessly trace back into the earlier calls. 1002 MOVQ m_curg(BX), SI 1003 MOVQ SI, g(CX) 1004 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 1005 MOVQ (g_sched+gobuf_pc)(SI), BX 1006 MOVQ BX, -8(DI) // "push" return PC on the g stack 1007 // Gather our arguments into registers. 1008 MOVQ fn+0(FP), BX 1009 MOVQ frame+8(FP), CX 1010 MOVQ ctxt+16(FP), DX 1011 // Compute the size of the frame, including return PC and, if 1012 // GOEXPERIMENT=framepointer, the saved base pointer 1013 LEAQ fn+0(FP), AX 1014 SUBQ SP, AX // AX is our actual frame size 1015 SUBQ AX, DI // Allocate the same frame size on the g stack 1016 MOVQ DI, SP 1017 1018 MOVQ BX, 0(SP) 1019 MOVQ CX, 8(SP) 1020 MOVQ DX, 16(SP) 1021 MOVQ $runtime·cgocallbackg(SB), AX 1022 CALL AX // indirect call to bypass nosplit check. We're on a different stack now. 1023 1024 // Compute the size of the frame again. FP and SP have 1025 // completely different values here than they did above, 1026 // but only their difference matters. 1027 LEAQ fn+0(FP), AX 1028 SUBQ SP, AX 1029 1030 // Restore g->sched (== m->curg->sched) from saved values. 1031 get_tls(CX) 1032 MOVQ g(CX), SI 1033 MOVQ SP, DI 1034 ADDQ AX, DI 1035 MOVQ -8(DI), BX 1036 MOVQ BX, (g_sched+gobuf_pc)(SI) 1037 MOVQ DI, (g_sched+gobuf_sp)(SI) 1038 1039 // Switch back to m->g0's stack and restore m->g0->sched.sp. 1040 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 1041 // so we do not have to restore it.) 1042 MOVQ g(CX), BX 1043 MOVQ g_m(BX), BX 1044 MOVQ m_g0(BX), SI 1045 MOVQ SI, g(CX) 1046 MOVQ (g_sched+gobuf_sp)(SI), SP 1047 MOVQ 0(SP), AX 1048 MOVQ AX, (g_sched+gobuf_sp)(SI) 1049 1050 // If the m on entry was nil, we called needm above to borrow an m 1051 // for the duration of the call. Since the call is over, return it with dropm. 1052 MOVQ savedm-8(SP), BX 1053 CMPQ BX, $0 1054 JNE done 1055 MOVQ $runtime·dropm(SB), AX 1056 CALL AX 1057 #ifdef GOOS_windows 1058 // We need to clear the TLS pointer in case the next 1059 // thread that comes into Go tries to reuse that space 1060 // but uses the same M. 1061 XORQ DI, DI 1062 CALL runtime·settls(SB) 1063 #endif 1064 done: 1065 1066 // Done! 1067 RET 1068 1069 // func setg(gg *g) 1070 // set g. for use by needm. 1071 TEXT runtime·setg(SB), NOSPLIT, $0-8 1072 MOVQ gg+0(FP), BX 1073 get_tls(CX) 1074 MOVQ BX, g(CX) 1075 RET 1076 1077 // void setg_gcc(G*); set g called from gcc. 1078 TEXT setg_gcc<>(SB),NOSPLIT,$0 1079 get_tls(AX) 1080 MOVQ DI, g(AX) 1081 MOVQ DI, R14 // set the g register 1082 RET 1083 1084 TEXT runtime·abort(SB),NOSPLIT,$0-0 1085 INT $3 1086 loop: 1087 JMP loop 1088 1089 // check that SP is in range [g->stack.lo, g->stack.hi) 1090 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0 1091 get_tls(CX) 1092 MOVQ g(CX), AX 1093 CMPQ (g_stack+stack_hi)(AX), SP 1094 JHI 2(PC) 1095 CALL runtime·abort(SB) 1096 CMPQ SP, (g_stack+stack_lo)(AX) 1097 JHI 2(PC) 1098 CALL runtime·abort(SB) 1099 RET 1100 1101 // func cputicks() int64 1102 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 1103 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1 1104 JNE fences 1105 // Instruction stream serializing RDTSCP is supported. 1106 // RDTSCP is supported by Intel Nehalem (2008) and 1107 // AMD K8 Rev. F (2006) and newer. 1108 RDTSCP 1109 done: 1110 SHLQ $32, DX 1111 ADDQ DX, AX 1112 MOVQ AX, ret+0(FP) 1113 RET 1114 fences: 1115 // MFENCE is instruction stream serializing and flushes the 1116 // store buffers on AMD. The serialization semantics of LFENCE on AMD 1117 // are dependent on MSR C001_1029 and CPU generation. 1118 // LFENCE on Intel does wait for all previous instructions to have executed. 1119 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all 1120 // previous instructions executed and all previous loads and stores to globally visible. 1121 // Using MFENCE;LFENCE here aligns the serializing properties without 1122 // runtime detection of CPU manufacturer. 1123 MFENCE 1124 LFENCE 1125 RDTSC 1126 JMP done 1127 1128 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr 1129 // hash function using AES hardware instructions 1130 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32 1131 // AX = ptr to data 1132 // BX = seed 1133 // CX = size 1134 CMPB runtime·useAeshash(SB), $0 1135 JEQ noaes 1136 JMP aeshashbody<>(SB) 1137 noaes: 1138 JMP runtime·memhashFallback<ABIInternal>(SB) 1139 1140 // func strhash(p unsafe.Pointer, h uintptr) uintptr 1141 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24 1142 // AX = ptr to string struct 1143 // BX = seed 1144 CMPB runtime·useAeshash(SB), $0 1145 JEQ noaes 1146 MOVQ 8(AX), CX // length of string 1147 MOVQ (AX), AX // string data 1148 JMP aeshashbody<>(SB) 1149 noaes: 1150 JMP runtime·strhashFallback<ABIInternal>(SB) 1151 1152 // AX: data 1153 // BX: hash seed 1154 // CX: length 1155 // At return: AX = return value 1156 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 1157 // Fill an SSE register with our seeds. 1158 MOVQ BX, X0 // 64 bits of per-table hash seed 1159 PINSRW $4, CX, X0 // 16 bits of length 1160 PSHUFHW $0, X0, X0 // repeat length 4 times total 1161 MOVO X0, X1 // save unscrambled seed 1162 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 1163 AESENC X0, X0 // scramble seed 1164 1165 CMPQ CX, $16 1166 JB aes0to15 1167 JE aes16 1168 CMPQ CX, $32 1169 JBE aes17to32 1170 CMPQ CX, $64 1171 JBE aes33to64 1172 CMPQ CX, $128 1173 JBE aes65to128 1174 JMP aes129plus 1175 1176 aes0to15: 1177 TESTQ CX, CX 1178 JE aes0 1179 1180 ADDQ $16, AX 1181 TESTW $0xff0, AX 1182 JE endofpage 1183 1184 // 16 bytes loaded at this address won't cross 1185 // a page boundary, so we can load it directly. 1186 MOVOU -16(AX), X1 1187 ADDQ CX, CX 1188 MOVQ $masks<>(SB), AX 1189 PAND (AX)(CX*8), X1 1190 final1: 1191 PXOR X0, X1 // xor data with seed 1192 AESENC X1, X1 // scramble combo 3 times 1193 AESENC X1, X1 1194 AESENC X1, X1 1195 MOVQ X1, AX // return X1 1196 RET 1197 1198 endofpage: 1199 // address ends in 1111xxxx. Might be up against 1200 // a page boundary, so load ending at last byte. 1201 // Then shift bytes down using pshufb. 1202 MOVOU -32(AX)(CX*1), X1 1203 ADDQ CX, CX 1204 MOVQ $shifts<>(SB), AX 1205 PSHUFB (AX)(CX*8), X1 1206 JMP final1 1207 1208 aes0: 1209 // Return scrambled input seed 1210 AESENC X0, X0 1211 MOVQ X0, AX // return X0 1212 RET 1213 1214 aes16: 1215 MOVOU (AX), X1 1216 JMP final1 1217 1218 aes17to32: 1219 // make second starting seed 1220 PXOR runtime·aeskeysched+16(SB), X1 1221 AESENC X1, X1 1222 1223 // load data to be hashed 1224 MOVOU (AX), X2 1225 MOVOU -16(AX)(CX*1), X3 1226 1227 // xor with seed 1228 PXOR X0, X2 1229 PXOR X1, X3 1230 1231 // scramble 3 times 1232 AESENC X2, X2 1233 AESENC X3, X3 1234 AESENC X2, X2 1235 AESENC X3, X3 1236 AESENC X2, X2 1237 AESENC X3, X3 1238 1239 // combine results 1240 PXOR X3, X2 1241 MOVQ X2, AX // return X2 1242 RET 1243 1244 aes33to64: 1245 // make 3 more starting seeds 1246 MOVO X1, X2 1247 MOVO X1, X3 1248 PXOR runtime·aeskeysched+16(SB), X1 1249 PXOR runtime·aeskeysched+32(SB), X2 1250 PXOR runtime·aeskeysched+48(SB), X3 1251 AESENC X1, X1 1252 AESENC X2, X2 1253 AESENC X3, X3 1254 1255 MOVOU (AX), X4 1256 MOVOU 16(AX), X5 1257 MOVOU -32(AX)(CX*1), X6 1258 MOVOU -16(AX)(CX*1), X7 1259 1260 PXOR X0, X4 1261 PXOR X1, X5 1262 PXOR X2, X6 1263 PXOR X3, X7 1264 1265 AESENC X4, X4 1266 AESENC X5, X5 1267 AESENC X6, X6 1268 AESENC X7, X7 1269 1270 AESENC X4, X4 1271 AESENC X5, X5 1272 AESENC X6, X6 1273 AESENC X7, X7 1274 1275 AESENC X4, X4 1276 AESENC X5, X5 1277 AESENC X6, X6 1278 AESENC X7, X7 1279 1280 PXOR X6, X4 1281 PXOR X7, X5 1282 PXOR X5, X4 1283 MOVQ X4, AX // return X4 1284 RET 1285 1286 aes65to128: 1287 // make 7 more starting seeds 1288 MOVO X1, X2 1289 MOVO X1, X3 1290 MOVO X1, X4 1291 MOVO X1, X5 1292 MOVO X1, X6 1293 MOVO X1, X7 1294 PXOR runtime·aeskeysched+16(SB), X1 1295 PXOR runtime·aeskeysched+32(SB), X2 1296 PXOR runtime·aeskeysched+48(SB), X3 1297 PXOR runtime·aeskeysched+64(SB), X4 1298 PXOR runtime·aeskeysched+80(SB), X5 1299 PXOR runtime·aeskeysched+96(SB), X6 1300 PXOR runtime·aeskeysched+112(SB), X7 1301 AESENC X1, X1 1302 AESENC X2, X2 1303 AESENC X3, X3 1304 AESENC X4, X4 1305 AESENC X5, X5 1306 AESENC X6, X6 1307 AESENC X7, X7 1308 1309 // load data 1310 MOVOU (AX), X8 1311 MOVOU 16(AX), X9 1312 MOVOU 32(AX), X10 1313 MOVOU 48(AX), X11 1314 MOVOU -64(AX)(CX*1), X12 1315 MOVOU -48(AX)(CX*1), X13 1316 MOVOU -32(AX)(CX*1), X14 1317 MOVOU -16(AX)(CX*1), X15 1318 1319 // xor with seed 1320 PXOR X0, X8 1321 PXOR X1, X9 1322 PXOR X2, X10 1323 PXOR X3, X11 1324 PXOR X4, X12 1325 PXOR X5, X13 1326 PXOR X6, X14 1327 PXOR X7, X15 1328 1329 // scramble 3 times 1330 AESENC X8, X8 1331 AESENC X9, X9 1332 AESENC X10, X10 1333 AESENC X11, X11 1334 AESENC X12, X12 1335 AESENC X13, X13 1336 AESENC X14, X14 1337 AESENC X15, X15 1338 1339 AESENC X8, X8 1340 AESENC X9, X9 1341 AESENC X10, X10 1342 AESENC X11, X11 1343 AESENC X12, X12 1344 AESENC X13, X13 1345 AESENC X14, X14 1346 AESENC X15, X15 1347 1348 AESENC X8, X8 1349 AESENC X9, X9 1350 AESENC X10, X10 1351 AESENC X11, X11 1352 AESENC X12, X12 1353 AESENC X13, X13 1354 AESENC X14, X14 1355 AESENC X15, X15 1356 1357 // combine results 1358 PXOR X12, X8 1359 PXOR X13, X9 1360 PXOR X14, X10 1361 PXOR X15, X11 1362 PXOR X10, X8 1363 PXOR X11, X9 1364 PXOR X9, X8 1365 // X15 must be zero on return 1366 PXOR X15, X15 1367 MOVQ X8, AX // return X8 1368 RET 1369 1370 aes129plus: 1371 // make 7 more starting seeds 1372 MOVO X1, X2 1373 MOVO X1, X3 1374 MOVO X1, X4 1375 MOVO X1, X5 1376 MOVO X1, X6 1377 MOVO X1, X7 1378 PXOR runtime·aeskeysched+16(SB), X1 1379 PXOR runtime·aeskeysched+32(SB), X2 1380 PXOR runtime·aeskeysched+48(SB), X3 1381 PXOR runtime·aeskeysched+64(SB), X4 1382 PXOR runtime·aeskeysched+80(SB), X5 1383 PXOR runtime·aeskeysched+96(SB), X6 1384 PXOR runtime·aeskeysched+112(SB), X7 1385 AESENC X1, X1 1386 AESENC X2, X2 1387 AESENC X3, X3 1388 AESENC X4, X4 1389 AESENC X5, X5 1390 AESENC X6, X6 1391 AESENC X7, X7 1392 1393 // start with last (possibly overlapping) block 1394 MOVOU -128(AX)(CX*1), X8 1395 MOVOU -112(AX)(CX*1), X9 1396 MOVOU -96(AX)(CX*1), X10 1397 MOVOU -80(AX)(CX*1), X11 1398 MOVOU -64(AX)(CX*1), X12 1399 MOVOU -48(AX)(CX*1), X13 1400 MOVOU -32(AX)(CX*1), X14 1401 MOVOU -16(AX)(CX*1), X15 1402 1403 // xor in seed 1404 PXOR X0, X8 1405 PXOR X1, X9 1406 PXOR X2, X10 1407 PXOR X3, X11 1408 PXOR X4, X12 1409 PXOR X5, X13 1410 PXOR X6, X14 1411 PXOR X7, X15 1412 1413 // compute number of remaining 128-byte blocks 1414 DECQ CX 1415 SHRQ $7, CX 1416 1417 aesloop: 1418 // scramble state 1419 AESENC X8, X8 1420 AESENC X9, X9 1421 AESENC X10, X10 1422 AESENC X11, X11 1423 AESENC X12, X12 1424 AESENC X13, X13 1425 AESENC X14, X14 1426 AESENC X15, X15 1427 1428 // scramble state, xor in a block 1429 MOVOU (AX), X0 1430 MOVOU 16(AX), X1 1431 MOVOU 32(AX), X2 1432 MOVOU 48(AX), X3 1433 AESENC X0, X8 1434 AESENC X1, X9 1435 AESENC X2, X10 1436 AESENC X3, X11 1437 MOVOU 64(AX), X4 1438 MOVOU 80(AX), X5 1439 MOVOU 96(AX), X6 1440 MOVOU 112(AX), X7 1441 AESENC X4, X12 1442 AESENC X5, X13 1443 AESENC X6, X14 1444 AESENC X7, X15 1445 1446 ADDQ $128, AX 1447 DECQ CX 1448 JNE aesloop 1449 1450 // 3 more scrambles to finish 1451 AESENC X8, X8 1452 AESENC X9, X9 1453 AESENC X10, X10 1454 AESENC X11, X11 1455 AESENC X12, X12 1456 AESENC X13, X13 1457 AESENC X14, X14 1458 AESENC X15, X15 1459 AESENC X8, X8 1460 AESENC X9, X9 1461 AESENC X10, X10 1462 AESENC X11, X11 1463 AESENC X12, X12 1464 AESENC X13, X13 1465 AESENC X14, X14 1466 AESENC X15, X15 1467 AESENC X8, X8 1468 AESENC X9, X9 1469 AESENC X10, X10 1470 AESENC X11, X11 1471 AESENC X12, X12 1472 AESENC X13, X13 1473 AESENC X14, X14 1474 AESENC X15, X15 1475 1476 PXOR X12, X8 1477 PXOR X13, X9 1478 PXOR X14, X10 1479 PXOR X15, X11 1480 PXOR X10, X8 1481 PXOR X11, X9 1482 PXOR X9, X8 1483 // X15 must be zero on return 1484 PXOR X15, X15 1485 MOVQ X8, AX // return X8 1486 RET 1487 1488 // func memhash32(p unsafe.Pointer, h uintptr) uintptr 1489 // ABIInternal for performance. 1490 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24 1491 // AX = ptr to data 1492 // BX = seed 1493 CMPB runtime·useAeshash(SB), $0 1494 JEQ noaes 1495 MOVQ BX, X0 // X0 = seed 1496 PINSRD $2, (AX), X0 // data 1497 AESENC runtime·aeskeysched+0(SB), X0 1498 AESENC runtime·aeskeysched+16(SB), X0 1499 AESENC runtime·aeskeysched+32(SB), X0 1500 MOVQ X0, AX // return X0 1501 RET 1502 noaes: 1503 JMP runtime·memhash32Fallback<ABIInternal>(SB) 1504 1505 // func memhash64(p unsafe.Pointer, h uintptr) uintptr 1506 // ABIInternal for performance. 1507 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24 1508 // AX = ptr to data 1509 // BX = seed 1510 CMPB runtime·useAeshash(SB), $0 1511 JEQ noaes 1512 MOVQ BX, X0 // X0 = seed 1513 PINSRQ $1, (AX), X0 // data 1514 AESENC runtime·aeskeysched+0(SB), X0 1515 AESENC runtime·aeskeysched+16(SB), X0 1516 AESENC runtime·aeskeysched+32(SB), X0 1517 MOVQ X0, AX // return X0 1518 RET 1519 noaes: 1520 JMP runtime·memhash64Fallback<ABIInternal>(SB) 1521 1522 // simple mask to get rid of data in the high part of the register. 1523 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1524 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1525 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1526 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1527 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1528 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1529 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1530 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1531 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1532 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1533 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1534 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1535 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1536 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1537 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1538 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1539 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1540 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1541 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1542 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1543 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1544 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1545 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1546 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1547 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1548 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1549 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1550 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1551 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1552 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1553 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1554 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1555 GLOBL masks<>(SB),RODATA,$256 1556 1557 // func checkASM() bool 1558 TEXT ·checkASM(SB),NOSPLIT,$0-1 1559 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1560 MOVQ $masks<>(SB), AX 1561 MOVQ $shifts<>(SB), BX 1562 ORQ BX, AX 1563 TESTQ $15, AX 1564 SETEQ ret+0(FP) 1565 RET 1566 1567 // these are arguments to pshufb. They move data down from 1568 // the high bytes of the register to the low bytes of the register. 1569 // index is how many bytes to move. 1570 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1571 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1572 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1573 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1574 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1575 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1576 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1577 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1578 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1579 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1580 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1581 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1582 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1583 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1584 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1585 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1586 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1587 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1588 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1589 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1590 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1591 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1592 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1593 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1594 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1595 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1596 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1597 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1598 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1599 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1600 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1601 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1602 GLOBL shifts<>(SB),RODATA,$256 1603 1604 TEXT runtime·return0(SB), NOSPLIT, $0 1605 MOVL $0, AX 1606 RET 1607 1608 1609 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1610 // Must obey the gcc calling convention. 1611 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1612 get_tls(CX) 1613 MOVQ g(CX), AX 1614 MOVQ g_m(AX), AX 1615 MOVQ m_curg(AX), AX 1616 MOVQ (g_stack+stack_hi)(AX), AX 1617 RET 1618 1619 // The top-most function running on a goroutine 1620 // returns to goexit+PCQuantum. 1621 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0 1622 BYTE $0x90 // NOP 1623 CALL runtime·goexit1(SB) // does not return 1624 // traceback from goexit1 must hit code range of goexit 1625 BYTE $0x90 // NOP 1626 1627 // This is called from .init_array and follows the platform, not Go, ABI. 1628 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1629 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1630 MOVQ runtime·lastmoduledatap(SB), AX 1631 MOVQ DI, moduledata_next(AX) 1632 MOVQ DI, runtime·lastmoduledatap(SB) 1633 POPQ R15 1634 RET 1635 1636 // Initialize special registers then jump to sigpanic. 1637 // This function is injected from the signal handler for panicking 1638 // signals. It is quite painful to set X15 in the signal context, 1639 // so we do it here. 1640 TEXT ·sigpanic0(SB),NOSPLIT,$0-0 1641 get_tls(R14) 1642 MOVQ g(R14), R14 1643 #ifndef GOOS_plan9 1644 XORPS X15, X15 1645 #endif 1646 JMP ·sigpanic<ABIInternal>(SB) 1647 1648 // gcWriteBarrier informs the GC about heap pointer writes. 1649 // 1650 // gcWriteBarrier returns space in a write barrier buffer which 1651 // should be filled in by the caller. 1652 // gcWriteBarrier does NOT follow the Go ABI. It accepts the 1653 // number of bytes of buffer needed in R11, and returns a pointer 1654 // to the buffer space in R11. 1655 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1656 // but may clobber others (e.g., SSE registers). 1657 // Typical use would be, when doing *(CX+88) = AX 1658 // CMPL $0, runtime.writeBarrier(SB) 1659 // JEQ dowrite 1660 // CALL runtime.gcBatchBarrier2(SB) 1661 // MOVQ AX, (R11) 1662 // MOVQ 88(CX), DX 1663 // MOVQ DX, 8(R11) 1664 // dowrite: 1665 // MOVQ AX, 88(CX) 1666 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112 1667 // Save the registers clobbered by the fast path. This is slightly 1668 // faster than having the caller spill these. 1669 MOVQ R12, 96(SP) 1670 MOVQ R13, 104(SP) 1671 retry: 1672 // TODO: Consider passing g.m.p in as an argument so they can be shared 1673 // across a sequence of write barriers. 1674 MOVQ g_m(R14), R13 1675 MOVQ m_p(R13), R13 1676 // Get current buffer write position. 1677 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position 1678 ADDQ R11, R12 // new next position 1679 // Is the buffer full? 1680 CMPQ R12, (p_wbBuf+wbBuf_end)(R13) 1681 JA flush 1682 // Commit to the larger buffer. 1683 MOVQ R12, (p_wbBuf+wbBuf_next)(R13) 1684 // Make return value (the original next position) 1685 SUBQ R11, R12 1686 MOVQ R12, R11 1687 // Restore registers. 1688 MOVQ 96(SP), R12 1689 MOVQ 104(SP), R13 1690 RET 1691 1692 flush: 1693 // Save all general purpose registers since these could be 1694 // clobbered by wbBufFlush and were not saved by the caller. 1695 // It is possible for wbBufFlush to clobber other registers 1696 // (e.g., SSE registers), but the compiler takes care of saving 1697 // those in the caller if necessary. This strikes a balance 1698 // with registers that are likely to be used. 1699 // 1700 // We don't have type information for these, but all code under 1701 // here is NOSPLIT, so nothing will observe these. 1702 // 1703 // TODO: We could strike a different balance; e.g., saving X0 1704 // and not saving GP registers that are less likely to be used. 1705 MOVQ DI, 0(SP) 1706 MOVQ AX, 8(SP) 1707 MOVQ BX, 16(SP) 1708 MOVQ CX, 24(SP) 1709 MOVQ DX, 32(SP) 1710 // DI already saved 1711 MOVQ SI, 40(SP) 1712 MOVQ BP, 48(SP) 1713 MOVQ R8, 56(SP) 1714 MOVQ R9, 64(SP) 1715 MOVQ R10, 72(SP) 1716 MOVQ R11, 80(SP) 1717 // R12 already saved 1718 // R13 already saved 1719 // R14 is g 1720 MOVQ R15, 88(SP) 1721 1722 CALL runtime·wbBufFlush(SB) 1723 1724 MOVQ 0(SP), DI 1725 MOVQ 8(SP), AX 1726 MOVQ 16(SP), BX 1727 MOVQ 24(SP), CX 1728 MOVQ 32(SP), DX 1729 MOVQ 40(SP), SI 1730 MOVQ 48(SP), BP 1731 MOVQ 56(SP), R8 1732 MOVQ 64(SP), R9 1733 MOVQ 72(SP), R10 1734 MOVQ 80(SP), R11 1735 MOVQ 88(SP), R15 1736 JMP retry 1737 1738 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1739 MOVL $8, R11 1740 JMP gcWriteBarrier<>(SB) 1741 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1742 MOVL $16, R11 1743 JMP gcWriteBarrier<>(SB) 1744 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1745 MOVL $24, R11 1746 JMP gcWriteBarrier<>(SB) 1747 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1748 MOVL $32, R11 1749 JMP gcWriteBarrier<>(SB) 1750 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1751 MOVL $40, R11 1752 JMP gcWriteBarrier<>(SB) 1753 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1754 MOVL $48, R11 1755 JMP gcWriteBarrier<>(SB) 1756 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1757 MOVL $56, R11 1758 JMP gcWriteBarrier<>(SB) 1759 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 1760 MOVL $64, R11 1761 JMP gcWriteBarrier<>(SB) 1762 1763 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" 1764 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below 1765 1766 // debugCallV2 is the entry point for debugger-injected function 1767 // calls on running goroutines. It informs the runtime that a 1768 // debug call has been injected and creates a call frame for the 1769 // debugger to fill in. 1770 // 1771 // To inject a function call, a debugger should: 1772 // 1. Check that the goroutine is in state _Grunning and that 1773 // there are at least 256 bytes free on the stack. 1774 // 2. Push the current PC on the stack (updating SP). 1775 // 3. Write the desired argument frame size at SP-16 (using the SP 1776 // after step 2). 1777 // 4. Save all machine registers (including flags and XMM registers) 1778 // so they can be restored later by the debugger. 1779 // 5. Set the PC to debugCallV2 and resume execution. 1780 // 1781 // If the goroutine is in state _Grunnable, then it's not generally 1782 // safe to inject a call because it may return out via other runtime 1783 // operations. Instead, the debugger should unwind the stack to find 1784 // the return to non-runtime code, add a temporary breakpoint there, 1785 // and inject the call once that breakpoint is hit. 1786 // 1787 // If the goroutine is in any other state, it's not safe to inject a call. 1788 // 1789 // This function communicates back to the debugger by setting R12 and 1790 // invoking INT3 to raise a breakpoint signal. See the comments in the 1791 // implementation for the protocol the debugger is expected to 1792 // follow. InjectDebugCall in the runtime tests demonstrates this protocol. 1793 // 1794 // The debugger must ensure that any pointers passed to the function 1795 // obey escape analysis requirements. Specifically, it must not pass 1796 // a stack pointer to an escaping argument. debugCallV2 cannot check 1797 // this invariant. 1798 // 1799 // This is ABIInternal because Go code injects its PC directly into new 1800 // goroutine stacks. 1801 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0 1802 // Save all registers that may contain pointers so they can be 1803 // conservatively scanned. 1804 // 1805 // We can't do anything that might clobber any of these 1806 // registers before this. 1807 MOVQ R15, r15-(14*8+8)(SP) 1808 MOVQ R14, r14-(13*8+8)(SP) 1809 MOVQ R13, r13-(12*8+8)(SP) 1810 MOVQ R12, r12-(11*8+8)(SP) 1811 MOVQ R11, r11-(10*8+8)(SP) 1812 MOVQ R10, r10-(9*8+8)(SP) 1813 MOVQ R9, r9-(8*8+8)(SP) 1814 MOVQ R8, r8-(7*8+8)(SP) 1815 MOVQ DI, di-(6*8+8)(SP) 1816 MOVQ SI, si-(5*8+8)(SP) 1817 MOVQ BP, bp-(4*8+8)(SP) 1818 MOVQ BX, bx-(3*8+8)(SP) 1819 MOVQ DX, dx-(2*8+8)(SP) 1820 // Save the frame size before we clobber it. Either of the last 1821 // saves could clobber this depending on whether there's a saved BP. 1822 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue 1823 MOVQ CX, cx-(1*8+8)(SP) 1824 MOVQ AX, ax-(0*8+8)(SP) 1825 1826 // Save the argument frame size. 1827 MOVQ DX, frameSize-128(SP) 1828 1829 // Perform a safe-point check. 1830 MOVQ retpc-8(FP), AX // Caller's PC 1831 MOVQ AX, 0(SP) 1832 CALL runtime·debugCallCheck(SB) 1833 MOVQ 8(SP), AX 1834 TESTQ AX, AX 1835 JZ good 1836 // The safety check failed. Put the reason string at the top 1837 // of the stack. 1838 MOVQ AX, 0(SP) 1839 MOVQ 16(SP), AX 1840 MOVQ AX, 8(SP) 1841 // Set R12 to 8 and invoke INT3. The debugger should get the 1842 // reason a call can't be injected from the top of the stack 1843 // and resume execution. 1844 MOVQ $8, R12 1845 BYTE $0xcc 1846 JMP restore 1847 1848 good: 1849 // Registers are saved and it's safe to make a call. 1850 // Open up a call frame, moving the stack if necessary. 1851 // 1852 // Once the frame is allocated, this will set R12 to 0 and 1853 // invoke INT3. The debugger should write the argument 1854 // frame for the call at SP, set up argument registers, push 1855 // the trapping PC on the stack, set the PC to the function to 1856 // call, set RDX to point to the closure (if a closure call), 1857 // and resume execution. 1858 // 1859 // If the function returns, this will set R12 to 1 and invoke 1860 // INT3. The debugger can then inspect any return value saved 1861 // on the stack at SP and in registers and resume execution again. 1862 // 1863 // If the function panics, this will set R12 to 2 and invoke INT3. 1864 // The interface{} value of the panic will be at SP. The debugger 1865 // can inspect the panic value and resume execution again. 1866 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1867 CMPQ AX, $MAXSIZE; \ 1868 JA 5(PC); \ 1869 MOVQ $NAME(SB), AX; \ 1870 MOVQ AX, 0(SP); \ 1871 CALL runtime·debugCallWrap(SB); \ 1872 JMP restore 1873 1874 MOVQ frameSize-128(SP), AX 1875 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1876 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1877 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1878 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1879 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1880 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1881 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1882 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1883 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1884 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1885 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1886 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1887 // The frame size is too large. Report the error. 1888 MOVQ $debugCallFrameTooLarge<>(SB), AX 1889 MOVQ AX, 0(SP) 1890 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string 1891 MOVQ $8, R12 1892 BYTE $0xcc 1893 JMP restore 1894 1895 restore: 1896 // Calls and failures resume here. 1897 // 1898 // Set R12 to 16 and invoke INT3. The debugger should restore 1899 // all registers except RIP and RSP and resume execution. 1900 MOVQ $16, R12 1901 BYTE $0xcc 1902 // We must not modify flags after this point. 1903 1904 // Restore pointer-containing registers, which may have been 1905 // modified from the debugger's copy by stack copying. 1906 MOVQ ax-(0*8+8)(SP), AX 1907 MOVQ cx-(1*8+8)(SP), CX 1908 MOVQ dx-(2*8+8)(SP), DX 1909 MOVQ bx-(3*8+8)(SP), BX 1910 MOVQ bp-(4*8+8)(SP), BP 1911 MOVQ si-(5*8+8)(SP), SI 1912 MOVQ di-(6*8+8)(SP), DI 1913 MOVQ r8-(7*8+8)(SP), R8 1914 MOVQ r9-(8*8+8)(SP), R9 1915 MOVQ r10-(9*8+8)(SP), R10 1916 MOVQ r11-(10*8+8)(SP), R11 1917 MOVQ r12-(11*8+8)(SP), R12 1918 MOVQ r13-(12*8+8)(SP), R13 1919 MOVQ r14-(13*8+8)(SP), R14 1920 MOVQ r15-(14*8+8)(SP), R15 1921 1922 RET 1923 1924 // runtime.debugCallCheck assumes that functions defined with the 1925 // DEBUG_CALL_FN macro are safe points to inject calls. 1926 #define DEBUG_CALL_FN(NAME,MAXSIZE) \ 1927 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 1928 NO_LOCAL_POINTERS; \ 1929 MOVQ $0, R12; \ 1930 BYTE $0xcc; \ 1931 MOVQ $1, R12; \ 1932 BYTE $0xcc; \ 1933 RET 1934 DEBUG_CALL_FN(debugCall32<>, 32) 1935 DEBUG_CALL_FN(debugCall64<>, 64) 1936 DEBUG_CALL_FN(debugCall128<>, 128) 1937 DEBUG_CALL_FN(debugCall256<>, 256) 1938 DEBUG_CALL_FN(debugCall512<>, 512) 1939 DEBUG_CALL_FN(debugCall1024<>, 1024) 1940 DEBUG_CALL_FN(debugCall2048<>, 2048) 1941 DEBUG_CALL_FN(debugCall4096<>, 4096) 1942 DEBUG_CALL_FN(debugCall8192<>, 8192) 1943 DEBUG_CALL_FN(debugCall16384<>, 16384) 1944 DEBUG_CALL_FN(debugCall32768<>, 32768) 1945 DEBUG_CALL_FN(debugCall65536<>, 65536) 1946 1947 // func debugCallPanicked(val interface{}) 1948 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 1949 // Copy the panic value to the top of stack. 1950 MOVQ val_type+0(FP), AX 1951 MOVQ AX, 0(SP) 1952 MOVQ val_data+8(FP), AX 1953 MOVQ AX, 8(SP) 1954 MOVQ $2, R12 1955 BYTE $0xcc 1956 RET 1957 1958 // Note: these functions use a special calling convention to save generated code space. 1959 // Arguments are passed in registers, but the space for those arguments are allocated 1960 // in the caller's stack frame. These stubs write the args into that stack space and 1961 // then tail call to the corresponding runtime handler. 1962 // The tail call makes these stubs disappear in backtraces. 1963 // Defined as ABIInternal since they do not use the stack-based Go ABI. 1964 TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16 1965 MOVQ CX, BX 1966 JMP runtime·goPanicIndex<ABIInternal>(SB) 1967 TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16 1968 MOVQ CX, BX 1969 JMP runtime·goPanicIndexU<ABIInternal>(SB) 1970 TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16 1971 MOVQ CX, AX 1972 MOVQ DX, BX 1973 JMP runtime·goPanicSliceAlen<ABIInternal>(SB) 1974 TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16 1975 MOVQ CX, AX 1976 MOVQ DX, BX 1977 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB) 1978 TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16 1979 MOVQ CX, AX 1980 MOVQ DX, BX 1981 JMP runtime·goPanicSliceAcap<ABIInternal>(SB) 1982 TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16 1983 MOVQ CX, AX 1984 MOVQ DX, BX 1985 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB) 1986 TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16 1987 MOVQ CX, BX 1988 JMP runtime·goPanicSliceB<ABIInternal>(SB) 1989 TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16 1990 MOVQ CX, BX 1991 JMP runtime·goPanicSliceBU<ABIInternal>(SB) 1992 TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16 1993 MOVQ DX, AX 1994 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB) 1995 TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16 1996 MOVQ DX, AX 1997 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB) 1998 TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16 1999 MOVQ DX, AX 2000 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB) 2001 TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16 2002 MOVQ DX, AX 2003 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB) 2004 TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16 2005 MOVQ CX, AX 2006 MOVQ DX, BX 2007 JMP runtime·goPanicSlice3B<ABIInternal>(SB) 2008 TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16 2009 MOVQ CX, AX 2010 MOVQ DX, BX 2011 JMP runtime·goPanicSlice3BU<ABIInternal>(SB) 2012 TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16 2013 MOVQ CX, BX 2014 JMP runtime·goPanicSlice3C<ABIInternal>(SB) 2015 TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16 2016 MOVQ CX, BX 2017 JMP runtime·goPanicSlice3CU<ABIInternal>(SB) 2018 TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16 2019 MOVQ DX, AX 2020 JMP runtime·goPanicSliceConvert<ABIInternal>(SB) 2021 2022 #ifdef GOOS_android 2023 // Use the free TLS_SLOT_APP slot #2 on Android Q. 2024 // Earlier androids are set up in gcc_android.c. 2025 DATA runtime·tls_g+0(SB)/8, $16 2026 GLOBL runtime·tls_g+0(SB), NOPTR, $8 2027 #endif 2028 #ifdef GOOS_windows 2029 GLOBL runtime·tls_g+0(SB), NOPTR, $8 2030 #endif 2031 2032 // The compiler and assembler's -spectre=ret mode rewrites 2033 // all indirect CALL AX / JMP AX instructions to be 2034 // CALL retpolineAX / JMP retpolineAX. 2035 // See https://support.google.com/faqs/answer/7625886. 2036 #define RETPOLINE(reg) \ 2037 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \ 2038 /* nospec: */ \ 2039 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \ 2040 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \ 2041 /* setup: */ \ 2042 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \ 2043 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \ 2044 /* RET */ BYTE $0xC3 2045 2046 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0) 2047 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1) 2048 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2) 2049 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3) 2050 /* SP is 4, can't happen / magic encodings */ 2051 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5) 2052 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6) 2053 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7) 2054 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8) 2055 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9) 2056 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10) 2057 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11) 2058 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12) 2059 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13) 2060 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14) 2061 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15) 2062 2063 TEXT ·getcallerfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0 2064 MOVQ BP, AX 2065 RET