github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_amd64 is common startup code for most amd64 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_amd64(SB),NOSPLIT,$-8 15 MOVQ 0(SP), DI // argc 16 LEAQ 8(SP), SI // argv 17 JMP runtime·rt0_go(SB) 18 19 // main is common startup code for most amd64 systems when using 20 // external linking. The C startup code will call the symbol "main" 21 // passing argc and argv in the usual C ABI registers DI and SI. 22 TEXT main(SB),NOSPLIT,$-8 23 JMP runtime·rt0_go(SB) 24 25 // _rt0_amd64_lib is common startup code for most amd64 systems when 26 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 27 // arrange to invoke this function as a global constructor (for 28 // c-archive) or when the shared library is loaded (for c-shared). 29 // We expect argc and argv to be passed in the usual C ABI registers 30 // DI and SI. 31 TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50 32 // Align stack per ELF ABI requirements. 33 MOVQ SP, AX 34 ANDQ $~15, SP 35 // Save C ABI callee-saved registers, as caller may need them. 36 MOVQ BX, 0x10(SP) 37 MOVQ BP, 0x18(SP) 38 MOVQ R12, 0x20(SP) 39 MOVQ R13, 0x28(SP) 40 MOVQ R14, 0x30(SP) 41 MOVQ R15, 0x38(SP) 42 MOVQ AX, 0x40(SP) 43 44 MOVQ DI, _rt0_amd64_lib_argc<>(SB) 45 MOVQ SI, _rt0_amd64_lib_argv<>(SB) 46 47 // Synchronous initialization. 48 CALL runtime·libpreinit(SB) 49 50 // Create a new thread to finish Go runtime initialization. 51 MOVQ _cgo_sys_thread_create(SB), AX 52 TESTQ AX, AX 53 JZ nocgo 54 MOVQ $_rt0_amd64_lib_go(SB), DI 55 MOVQ $0, SI 56 CALL AX 57 JMP restore 58 59 nocgo: 60 MOVQ $0x800000, 0(SP) // stacksize 61 MOVQ $_rt0_amd64_lib_go(SB), AX 62 MOVQ AX, 8(SP) // fn 63 CALL runtime·newosproc0(SB) 64 65 restore: 66 MOVQ 0x10(SP), BX 67 MOVQ 0x18(SP), BP 68 MOVQ 0x20(SP), R12 69 MOVQ 0x28(SP), R13 70 MOVQ 0x30(SP), R14 71 MOVQ 0x38(SP), R15 72 MOVQ 0x40(SP), SP 73 RET 74 75 // _rt0_amd64_lib_go initializes the Go runtime. 76 // This is started in a separate thread by _rt0_amd64_lib. 77 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 78 MOVQ _rt0_amd64_lib_argc<>(SB), DI 79 MOVQ _rt0_amd64_lib_argv<>(SB), SI 80 JMP runtime·rt0_go(SB) 81 82 DATA _rt0_amd64_lib_argc<>(SB)/8, $0 83 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 84 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 85 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 86 87 TEXT runtime·rt0_go(SB),NOSPLIT,$0 88 // copy arguments forward on an even stack 89 MOVQ DI, AX // argc 90 MOVQ SI, BX // argv 91 SUBQ $(4*8+7), SP // 2args 2auto 92 ANDQ $~15, SP 93 MOVQ AX, 16(SP) 94 MOVQ BX, 24(SP) 95 96 // create istack out of the given (operating system) stack. 97 // _cgo_init may update stackguard. 98 MOVQ $runtime·g0(SB), DI 99 LEAQ (-64*1024+104)(SP), BX 100 MOVQ BX, g_stackguard0(DI) 101 MOVQ BX, g_stackguard1(DI) 102 MOVQ BX, (g_stack+stack_lo)(DI) 103 MOVQ SP, (g_stack+stack_hi)(DI) 104 105 // find out information about the processor we're on 106 MOVL $0, AX 107 CPUID 108 MOVL AX, SI 109 CMPL AX, $0 110 JE nocpuinfo 111 112 // Figure out how to serialize RDTSC. 113 // On Intel processors LFENCE is enough. AMD requires MFENCE. 114 // Don't know about the rest, so let's do MFENCE. 115 CMPL BX, $0x756E6547 // "Genu" 116 JNE notintel 117 CMPL DX, $0x49656E69 // "ineI" 118 JNE notintel 119 CMPL CX, $0x6C65746E // "ntel" 120 JNE notintel 121 MOVB $1, runtime·isIntel(SB) 122 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 123 notintel: 124 125 // Load EAX=1 cpuid flags 126 MOVL $1, AX 127 CPUID 128 MOVL AX, runtime·processorVersionInfo(SB) 129 130 nocpuinfo: 131 // if there is an _cgo_init, call it. 132 MOVQ _cgo_init(SB), AX 133 TESTQ AX, AX 134 JZ needtls 135 // g0 already in DI 136 MOVQ DI, CX // Win64 uses CX for first parameter 137 MOVQ $setg_gcc<>(SB), SI 138 CALL AX 139 140 // update stackguard after _cgo_init 141 MOVQ $runtime·g0(SB), CX 142 MOVQ (g_stack+stack_lo)(CX), AX 143 ADDQ $const__StackGuard, AX 144 MOVQ AX, g_stackguard0(CX) 145 MOVQ AX, g_stackguard1(CX) 146 147 #ifndef GOOS_windows 148 JMP ok 149 #endif 150 needtls: 151 #ifdef GOOS_plan9 152 // skip TLS setup on Plan 9 153 JMP ok 154 #endif 155 #ifdef GOOS_solaris 156 // skip TLS setup on Solaris 157 JMP ok 158 #endif 159 #ifdef GOOS_darwin 160 // skip TLS setup on Darwin 161 JMP ok 162 #endif 163 164 LEAQ runtime·m0+m_tls(SB), DI 165 CALL runtime·settls(SB) 166 167 // store through it, to make sure it works 168 get_tls(BX) 169 MOVQ $0x123, g(BX) 170 MOVQ runtime·m0+m_tls(SB), AX 171 CMPQ AX, $0x123 172 JEQ 2(PC) 173 CALL runtime·abort(SB) 174 ok: 175 // set the per-goroutine and per-mach "registers" 176 get_tls(BX) 177 LEAQ runtime·g0(SB), CX 178 MOVQ CX, g(BX) 179 LEAQ runtime·m0(SB), AX 180 181 // save m->g0 = g0 182 MOVQ CX, m_g0(AX) 183 // save m0 to g0->m 184 MOVQ AX, g_m(CX) 185 186 CLD // convention is D is always left cleared 187 CALL runtime·check(SB) 188 189 MOVL 16(SP), AX // copy argc 190 MOVL AX, 0(SP) 191 MOVQ 24(SP), AX // copy argv 192 MOVQ AX, 8(SP) 193 CALL runtime·args(SB) 194 CALL runtime·osinit(SB) 195 CALL runtime·schedinit(SB) 196 197 // create a new goroutine to start program 198 MOVQ $runtime·mainPC(SB), AX // entry 199 PUSHQ AX 200 PUSHQ $0 // arg size 201 CALL runtime·newproc(SB) 202 POPQ AX 203 POPQ AX 204 205 // start this M 206 CALL runtime·mstart(SB) 207 208 CALL runtime·abort(SB) // mstart should never return 209 RET 210 211 // Prevent dead-code elimination of debugCallV1, which is 212 // intended to be called by debuggers. 213 MOVQ $runtime·debugCallV1(SB), AX 214 RET 215 216 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 217 GLOBL runtime·mainPC(SB),RODATA,$8 218 219 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 220 BYTE $0xcc 221 RET 222 223 TEXT runtime·asminit(SB),NOSPLIT,$0-0 224 // No per-thread init. 225 RET 226 227 /* 228 * go-routine 229 */ 230 231 // func gosave(buf *gobuf) 232 // save state in Gobuf; setjmp 233 TEXT runtime·gosave(SB), NOSPLIT, $0-8 234 MOVQ buf+0(FP), AX // gobuf 235 LEAQ buf+0(FP), BX // caller's SP 236 MOVQ BX, gobuf_sp(AX) 237 MOVQ 0(SP), BX // caller's PC 238 MOVQ BX, gobuf_pc(AX) 239 MOVQ $0, gobuf_ret(AX) 240 MOVQ BP, gobuf_bp(AX) 241 // Assert ctxt is zero. See func save. 242 MOVQ gobuf_ctxt(AX), BX 243 TESTQ BX, BX 244 JZ 2(PC) 245 CALL runtime·badctxt(SB) 246 get_tls(CX) 247 MOVQ g(CX), BX 248 MOVQ BX, gobuf_g(AX) 249 RET 250 251 // func gogo(buf *gobuf) 252 // restore state from Gobuf; longjmp 253 TEXT runtime·gogo(SB), NOSPLIT, $16-8 254 MOVQ buf+0(FP), BX // gobuf 255 MOVQ gobuf_g(BX), DX 256 MOVQ 0(DX), CX // make sure g != nil 257 get_tls(CX) 258 MOVQ DX, g(CX) 259 MOVQ gobuf_sp(BX), SP // restore SP 260 MOVQ gobuf_ret(BX), AX 261 MOVQ gobuf_ctxt(BX), DX 262 MOVQ gobuf_bp(BX), BP 263 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 264 MOVQ $0, gobuf_ret(BX) 265 MOVQ $0, gobuf_ctxt(BX) 266 MOVQ $0, gobuf_bp(BX) 267 MOVQ gobuf_pc(BX), BX 268 JMP BX 269 270 // func mcall(fn func(*g)) 271 // Switch to m->g0's stack, call fn(g). 272 // Fn must never return. It should gogo(&g->sched) 273 // to keep running g. 274 TEXT runtime·mcall(SB), NOSPLIT, $0-8 275 MOVQ fn+0(FP), DI 276 277 get_tls(CX) 278 MOVQ g(CX), AX // save state in g->sched 279 MOVQ 0(SP), BX // caller's PC 280 MOVQ BX, (g_sched+gobuf_pc)(AX) 281 LEAQ fn+0(FP), BX // caller's SP 282 MOVQ BX, (g_sched+gobuf_sp)(AX) 283 MOVQ AX, (g_sched+gobuf_g)(AX) 284 MOVQ BP, (g_sched+gobuf_bp)(AX) 285 286 // switch to m->g0 & its stack, call fn 287 MOVQ g(CX), BX 288 MOVQ g_m(BX), BX 289 MOVQ m_g0(BX), SI 290 CMPQ SI, AX // if g == m->g0 call badmcall 291 JNE 3(PC) 292 MOVQ $runtime·badmcall(SB), AX 293 JMP AX 294 MOVQ SI, g(CX) // g = m->g0 295 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 296 PUSHQ AX 297 MOVQ DI, DX 298 MOVQ 0(DI), DI 299 CALL DI 300 POPQ AX 301 MOVQ $runtime·badmcall2(SB), AX 302 JMP AX 303 RET 304 305 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 306 // of the G stack. We need to distinguish the routine that 307 // lives at the bottom of the G stack from the one that lives 308 // at the top of the system stack because the one at the top of 309 // the system stack terminates the stack walk (see topofstack()). 310 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 311 RET 312 313 // func systemstack(fn func()) 314 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 315 MOVQ fn+0(FP), DI // DI = fn 316 get_tls(CX) 317 MOVQ g(CX), AX // AX = g 318 MOVQ g_m(AX), BX // BX = m 319 320 CMPQ AX, m_gsignal(BX) 321 JEQ noswitch 322 323 MOVQ m_g0(BX), DX // DX = g0 324 CMPQ AX, DX 325 JEQ noswitch 326 327 CMPQ AX, m_curg(BX) 328 JNE bad 329 330 // switch stacks 331 // save our state in g->sched. Pretend to 332 // be systemstack_switch if the G stack is scanned. 333 MOVQ $runtime·systemstack_switch(SB), SI 334 MOVQ SI, (g_sched+gobuf_pc)(AX) 335 MOVQ SP, (g_sched+gobuf_sp)(AX) 336 MOVQ AX, (g_sched+gobuf_g)(AX) 337 MOVQ BP, (g_sched+gobuf_bp)(AX) 338 339 // switch to g0 340 MOVQ DX, g(CX) 341 MOVQ (g_sched+gobuf_sp)(DX), BX 342 // make it look like mstart called systemstack on g0, to stop traceback 343 SUBQ $8, BX 344 MOVQ $runtime·mstart(SB), DX 345 MOVQ DX, 0(BX) 346 MOVQ BX, SP 347 348 // call target function 349 MOVQ DI, DX 350 MOVQ 0(DI), DI 351 CALL DI 352 353 // switch back to g 354 get_tls(CX) 355 MOVQ g(CX), AX 356 MOVQ g_m(AX), BX 357 MOVQ m_curg(BX), AX 358 MOVQ AX, g(CX) 359 MOVQ (g_sched+gobuf_sp)(AX), SP 360 MOVQ $0, (g_sched+gobuf_sp)(AX) 361 RET 362 363 noswitch: 364 // already on m stack; tail call the function 365 // Using a tail call here cleans up tracebacks since we won't stop 366 // at an intermediate systemstack. 367 MOVQ DI, DX 368 MOVQ 0(DI), DI 369 JMP DI 370 371 bad: 372 // Bad: g is not gsignal, not g0, not curg. What is it? 373 MOVQ $runtime·badsystemstack(SB), AX 374 CALL AX 375 INT $3 376 377 378 /* 379 * support for morestack 380 */ 381 382 // Called during function prolog when more stack is needed. 383 // 384 // The traceback routines see morestack on a g0 as being 385 // the top of a stack (for example, morestack calling newstack 386 // calling the scheduler calling newm calling gc), so we must 387 // record an argument size. For that purpose, it has no arguments. 388 TEXT runtime·morestack(SB),NOSPLIT,$0-0 389 // Cannot grow scheduler stack (m->g0). 390 get_tls(CX) 391 MOVQ g(CX), BX 392 MOVQ g_m(BX), BX 393 MOVQ m_g0(BX), SI 394 CMPQ g(CX), SI 395 JNE 3(PC) 396 CALL runtime·badmorestackg0(SB) 397 CALL runtime·abort(SB) 398 399 // Cannot grow signal stack (m->gsignal). 400 MOVQ m_gsignal(BX), SI 401 CMPQ g(CX), SI 402 JNE 3(PC) 403 CALL runtime·badmorestackgsignal(SB) 404 CALL runtime·abort(SB) 405 406 // Called from f. 407 // Set m->morebuf to f's caller. 408 MOVQ 8(SP), AX // f's caller's PC 409 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 410 LEAQ 16(SP), AX // f's caller's SP 411 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 412 get_tls(CX) 413 MOVQ g(CX), SI 414 MOVQ SI, (m_morebuf+gobuf_g)(BX) 415 416 // Set g->sched to context in f. 417 MOVQ 0(SP), AX // f's PC 418 MOVQ AX, (g_sched+gobuf_pc)(SI) 419 MOVQ SI, (g_sched+gobuf_g)(SI) 420 LEAQ 8(SP), AX // f's SP 421 MOVQ AX, (g_sched+gobuf_sp)(SI) 422 MOVQ BP, (g_sched+gobuf_bp)(SI) 423 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 424 425 // Call newstack on m->g0's stack. 426 MOVQ m_g0(BX), BX 427 MOVQ BX, g(CX) 428 MOVQ (g_sched+gobuf_sp)(BX), SP 429 CALL runtime·newstack(SB) 430 CALL runtime·abort(SB) // crash if newstack returns 431 RET 432 433 // morestack but not preserving ctxt. 434 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 435 MOVL $0, DX 436 JMP runtime·morestack(SB) 437 438 // reflectcall: call a function with the given argument list 439 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 440 // we don't have variable-sized frames, so we use a small number 441 // of constant-sized-frame functions to encode a few bits of size in the pc. 442 // Caution: ugly multiline assembly macros in your future! 443 444 #define DISPATCH(NAME,MAXSIZE) \ 445 CMPQ CX, $MAXSIZE; \ 446 JA 3(PC); \ 447 MOVQ $NAME(SB), AX; \ 448 JMP AX 449 // Note: can't just "JMP NAME(SB)" - bad inlining results. 450 451 TEXT reflect·call(SB), NOSPLIT, $0-0 452 JMP ·reflectcall(SB) 453 454 TEXT ·reflectcall(SB), NOSPLIT, $0-32 455 MOVLQZX argsize+24(FP), CX 456 DISPATCH(runtime·call32, 32) 457 DISPATCH(runtime·call64, 64) 458 DISPATCH(runtime·call128, 128) 459 DISPATCH(runtime·call256, 256) 460 DISPATCH(runtime·call512, 512) 461 DISPATCH(runtime·call1024, 1024) 462 DISPATCH(runtime·call2048, 2048) 463 DISPATCH(runtime·call4096, 4096) 464 DISPATCH(runtime·call8192, 8192) 465 DISPATCH(runtime·call16384, 16384) 466 DISPATCH(runtime·call32768, 32768) 467 DISPATCH(runtime·call65536, 65536) 468 DISPATCH(runtime·call131072, 131072) 469 DISPATCH(runtime·call262144, 262144) 470 DISPATCH(runtime·call524288, 524288) 471 DISPATCH(runtime·call1048576, 1048576) 472 DISPATCH(runtime·call2097152, 2097152) 473 DISPATCH(runtime·call4194304, 4194304) 474 DISPATCH(runtime·call8388608, 8388608) 475 DISPATCH(runtime·call16777216, 16777216) 476 DISPATCH(runtime·call33554432, 33554432) 477 DISPATCH(runtime·call67108864, 67108864) 478 DISPATCH(runtime·call134217728, 134217728) 479 DISPATCH(runtime·call268435456, 268435456) 480 DISPATCH(runtime·call536870912, 536870912) 481 DISPATCH(runtime·call1073741824, 1073741824) 482 MOVQ $runtime·badreflectcall(SB), AX 483 JMP AX 484 485 #define CALLFN(NAME,MAXSIZE) \ 486 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 487 NO_LOCAL_POINTERS; \ 488 /* copy arguments to stack */ \ 489 MOVQ argptr+16(FP), SI; \ 490 MOVLQZX argsize+24(FP), CX; \ 491 MOVQ SP, DI; \ 492 REP;MOVSB; \ 493 /* call function */ \ 494 MOVQ f+8(FP), DX; \ 495 PCDATA $PCDATA_StackMapIndex, $0; \ 496 CALL (DX); \ 497 /* copy return values back */ \ 498 MOVQ argtype+0(FP), DX; \ 499 MOVQ argptr+16(FP), DI; \ 500 MOVLQZX argsize+24(FP), CX; \ 501 MOVLQZX retoffset+28(FP), BX; \ 502 MOVQ SP, SI; \ 503 ADDQ BX, DI; \ 504 ADDQ BX, SI; \ 505 SUBQ BX, CX; \ 506 CALL callRet<>(SB); \ 507 RET 508 509 // callRet copies return values back at the end of call*. This is a 510 // separate function so it can allocate stack space for the arguments 511 // to reflectcallmove. It does not follow the Go ABI; it expects its 512 // arguments in registers. 513 TEXT callRet<>(SB), NOSPLIT, $32-0 514 NO_LOCAL_POINTERS 515 MOVQ DX, 0(SP) 516 MOVQ DI, 8(SP) 517 MOVQ SI, 16(SP) 518 MOVQ CX, 24(SP) 519 CALL runtime·reflectcallmove(SB) 520 RET 521 522 CALLFN(·call32, 32) 523 CALLFN(·call64, 64) 524 CALLFN(·call128, 128) 525 CALLFN(·call256, 256) 526 CALLFN(·call512, 512) 527 CALLFN(·call1024, 1024) 528 CALLFN(·call2048, 2048) 529 CALLFN(·call4096, 4096) 530 CALLFN(·call8192, 8192) 531 CALLFN(·call16384, 16384) 532 CALLFN(·call32768, 32768) 533 CALLFN(·call65536, 65536) 534 CALLFN(·call131072, 131072) 535 CALLFN(·call262144, 262144) 536 CALLFN(·call524288, 524288) 537 CALLFN(·call1048576, 1048576) 538 CALLFN(·call2097152, 2097152) 539 CALLFN(·call4194304, 4194304) 540 CALLFN(·call8388608, 8388608) 541 CALLFN(·call16777216, 16777216) 542 CALLFN(·call33554432, 33554432) 543 CALLFN(·call67108864, 67108864) 544 CALLFN(·call134217728, 134217728) 545 CALLFN(·call268435456, 268435456) 546 CALLFN(·call536870912, 536870912) 547 CALLFN(·call1073741824, 1073741824) 548 549 TEXT runtime·procyield(SB),NOSPLIT,$0-0 550 MOVL cycles+0(FP), AX 551 again: 552 PAUSE 553 SUBL $1, AX 554 JNZ again 555 RET 556 557 558 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 559 // Stores are already ordered on x86, so this is just a 560 // compile barrier. 561 RET 562 563 // func jmpdefer(fv *funcval, argp uintptr) 564 // argp is a caller SP. 565 // called from deferreturn. 566 // 1. pop the caller 567 // 2. sub 5 bytes from the callers return 568 // 3. jmp to the argument 569 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 570 MOVQ fv+0(FP), DX // fn 571 MOVQ argp+8(FP), BX // caller sp 572 LEAQ -8(BX), SP // caller sp after CALL 573 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use) 574 SUBQ $5, (SP) // return to CALL again 575 MOVQ 0(DX), BX 576 JMP BX // but first run the deferred function 577 578 // Save state of caller into g->sched. Smashes R8, R9. 579 TEXT gosave<>(SB),NOSPLIT,$0 580 get_tls(R8) 581 MOVQ g(R8), R8 582 MOVQ 0(SP), R9 583 MOVQ R9, (g_sched+gobuf_pc)(R8) 584 LEAQ 8(SP), R9 585 MOVQ R9, (g_sched+gobuf_sp)(R8) 586 MOVQ $0, (g_sched+gobuf_ret)(R8) 587 MOVQ BP, (g_sched+gobuf_bp)(R8) 588 // Assert ctxt is zero. See func save. 589 MOVQ (g_sched+gobuf_ctxt)(R8), R9 590 TESTQ R9, R9 591 JZ 2(PC) 592 CALL runtime·badctxt(SB) 593 RET 594 595 // func asmcgocall(fn, arg unsafe.Pointer) int32 596 // Call fn(arg) on the scheduler stack, 597 // aligned appropriately for the gcc ABI. 598 // See cgocall.go for more details. 599 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 600 MOVQ fn+0(FP), AX 601 MOVQ arg+8(FP), BX 602 603 MOVQ SP, DX 604 605 // Figure out if we need to switch to m->g0 stack. 606 // We get called to create new OS threads too, and those 607 // come in on the m->g0 stack already. 608 get_tls(CX) 609 MOVQ g(CX), R8 610 CMPQ R8, $0 611 JEQ nosave 612 MOVQ g_m(R8), R8 613 MOVQ m_g0(R8), SI 614 MOVQ g(CX), DI 615 CMPQ SI, DI 616 JEQ nosave 617 MOVQ m_gsignal(R8), SI 618 CMPQ SI, DI 619 JEQ nosave 620 621 // Switch to system stack. 622 MOVQ m_g0(R8), SI 623 CALL gosave<>(SB) 624 MOVQ SI, g(CX) 625 MOVQ (g_sched+gobuf_sp)(SI), SP 626 627 // Now on a scheduling stack (a pthread-created stack). 628 // Make sure we have enough room for 4 stack-backed fast-call 629 // registers as per windows amd64 calling convention. 630 SUBQ $64, SP 631 ANDQ $~15, SP // alignment for gcc ABI 632 MOVQ DI, 48(SP) // save g 633 MOVQ (g_stack+stack_hi)(DI), DI 634 SUBQ DX, DI 635 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 636 MOVQ BX, DI // DI = first argument in AMD64 ABI 637 MOVQ BX, CX // CX = first argument in Win64 638 CALL AX 639 640 // Restore registers, g, stack pointer. 641 get_tls(CX) 642 MOVQ 48(SP), DI 643 MOVQ (g_stack+stack_hi)(DI), SI 644 SUBQ 40(SP), SI 645 MOVQ DI, g(CX) 646 MOVQ SI, SP 647 648 MOVL AX, ret+16(FP) 649 RET 650 651 nosave: 652 // Running on a system stack, perhaps even without a g. 653 // Having no g can happen during thread creation or thread teardown 654 // (see needm/dropm on Solaris, for example). 655 // This code is like the above sequence but without saving/restoring g 656 // and without worrying about the stack moving out from under us 657 // (because we're on a system stack, not a goroutine stack). 658 // The above code could be used directly if already on a system stack, 659 // but then the only path through this code would be a rare case on Solaris. 660 // Using this code for all "already on system stack" calls exercises it more, 661 // which should help keep it correct. 662 SUBQ $64, SP 663 ANDQ $~15, SP 664 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging 665 MOVQ DX, 40(SP) // save original stack pointer 666 MOVQ BX, DI // DI = first argument in AMD64 ABI 667 MOVQ BX, CX // CX = first argument in Win64 668 CALL AX 669 MOVQ 40(SP), SI // restore original stack pointer 670 MOVQ SI, SP 671 MOVL AX, ret+16(FP) 672 RET 673 674 // func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr) 675 // Turn the fn into a Go func (by taking its address) and call 676 // cgocallback_gofunc. 677 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 678 LEAQ fn+0(FP), AX 679 MOVQ AX, 0(SP) 680 MOVQ frame+8(FP), AX 681 MOVQ AX, 8(SP) 682 MOVQ framesize+16(FP), AX 683 MOVQ AX, 16(SP) 684 MOVQ ctxt+24(FP), AX 685 MOVQ AX, 24(SP) 686 MOVQ $runtime·cgocallback_gofunc(SB), AX 687 CALL AX 688 RET 689 690 // func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr) 691 // See cgocall.go for more details. 692 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 693 NO_LOCAL_POINTERS 694 695 // If g is nil, Go did not create the current thread. 696 // Call needm to obtain one m for temporary use. 697 // In this case, we're running on the thread stack, so there's 698 // lots of space, but the linker doesn't know. Hide the call from 699 // the linker analysis by using an indirect call through AX. 700 get_tls(CX) 701 #ifdef GOOS_windows 702 MOVL $0, BX 703 CMPQ CX, $0 704 JEQ 2(PC) 705 #endif 706 MOVQ g(CX), BX 707 CMPQ BX, $0 708 JEQ needm 709 MOVQ g_m(BX), BX 710 MOVQ BX, R8 // holds oldm until end of function 711 JMP havem 712 needm: 713 MOVQ $0, 0(SP) 714 MOVQ $runtime·needm(SB), AX 715 CALL AX 716 MOVQ 0(SP), R8 717 get_tls(CX) 718 MOVQ g(CX), BX 719 MOVQ g_m(BX), BX 720 721 // Set m->sched.sp = SP, so that if a panic happens 722 // during the function we are about to execute, it will 723 // have a valid SP to run on the g0 stack. 724 // The next few lines (after the havem label) 725 // will save this SP onto the stack and then write 726 // the same SP back to m->sched.sp. That seems redundant, 727 // but if an unrecovered panic happens, unwindm will 728 // restore the g->sched.sp from the stack location 729 // and then systemstack will try to use it. If we don't set it here, 730 // that restored SP will be uninitialized (typically 0) and 731 // will not be usable. 732 MOVQ m_g0(BX), SI 733 MOVQ SP, (g_sched+gobuf_sp)(SI) 734 735 havem: 736 // Now there's a valid m, and we're running on its m->g0. 737 // Save current m->g0->sched.sp on stack and then set it to SP. 738 // Save current sp in m->g0->sched.sp in preparation for 739 // switch back to m->curg stack. 740 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 741 MOVQ m_g0(BX), SI 742 MOVQ (g_sched+gobuf_sp)(SI), AX 743 MOVQ AX, 0(SP) 744 MOVQ SP, (g_sched+gobuf_sp)(SI) 745 746 // Switch to m->curg stack and call runtime.cgocallbackg. 747 // Because we are taking over the execution of m->curg 748 // but *not* resuming what had been running, we need to 749 // save that information (m->curg->sched) so we can restore it. 750 // We can restore m->curg->sched.sp easily, because calling 751 // runtime.cgocallbackg leaves SP unchanged upon return. 752 // To save m->curg->sched.pc, we push it onto the stack. 753 // This has the added benefit that it looks to the traceback 754 // routine like cgocallbackg is going to return to that 755 // PC (because the frame we allocate below has the same 756 // size as cgocallback_gofunc's frame declared above) 757 // so that the traceback will seamlessly trace back into 758 // the earlier calls. 759 // 760 // In the new goroutine, 8(SP) holds the saved R8. 761 MOVQ m_curg(BX), SI 762 MOVQ SI, g(CX) 763 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 764 MOVQ (g_sched+gobuf_pc)(SI), BX 765 MOVQ BX, -8(DI) 766 // Compute the size of the frame, including return PC and, if 767 // GOEXPERIMENT=framepointer, the saved base pointer 768 MOVQ ctxt+24(FP), BX 769 LEAQ fv+0(FP), AX 770 SUBQ SP, AX 771 SUBQ AX, DI 772 MOVQ DI, SP 773 774 MOVQ R8, 8(SP) 775 MOVQ BX, 0(SP) 776 CALL runtime·cgocallbackg(SB) 777 MOVQ 8(SP), R8 778 779 // Compute the size of the frame again. FP and SP have 780 // completely different values here than they did above, 781 // but only their difference matters. 782 LEAQ fv+0(FP), AX 783 SUBQ SP, AX 784 785 // Restore g->sched (== m->curg->sched) from saved values. 786 get_tls(CX) 787 MOVQ g(CX), SI 788 MOVQ SP, DI 789 ADDQ AX, DI 790 MOVQ -8(DI), BX 791 MOVQ BX, (g_sched+gobuf_pc)(SI) 792 MOVQ DI, (g_sched+gobuf_sp)(SI) 793 794 // Switch back to m->g0's stack and restore m->g0->sched.sp. 795 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 796 // so we do not have to restore it.) 797 MOVQ g(CX), BX 798 MOVQ g_m(BX), BX 799 MOVQ m_g0(BX), SI 800 MOVQ SI, g(CX) 801 MOVQ (g_sched+gobuf_sp)(SI), SP 802 MOVQ 0(SP), AX 803 MOVQ AX, (g_sched+gobuf_sp)(SI) 804 805 // If the m on entry was nil, we called needm above to borrow an m 806 // for the duration of the call. Since the call is over, return it with dropm. 807 CMPQ R8, $0 808 JNE 3(PC) 809 MOVQ $runtime·dropm(SB), AX 810 CALL AX 811 812 // Done! 813 RET 814 815 // func setg(gg *g) 816 // set g. for use by needm. 817 TEXT runtime·setg(SB), NOSPLIT, $0-8 818 MOVQ gg+0(FP), BX 819 #ifdef GOOS_windows 820 CMPQ BX, $0 821 JNE settls 822 MOVQ $0, 0x28(GS) 823 RET 824 settls: 825 MOVQ g_m(BX), AX 826 LEAQ m_tls(AX), AX 827 MOVQ AX, 0x28(GS) 828 #endif 829 get_tls(CX) 830 MOVQ BX, g(CX) 831 RET 832 833 // void setg_gcc(G*); set g called from gcc. 834 TEXT setg_gcc<>(SB),NOSPLIT,$0 835 get_tls(AX) 836 MOVQ DI, g(AX) 837 RET 838 839 TEXT runtime·abort(SB),NOSPLIT,$0-0 840 INT $3 841 loop: 842 JMP loop 843 844 // check that SP is in range [g->stack.lo, g->stack.hi) 845 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 846 get_tls(CX) 847 MOVQ g(CX), AX 848 CMPQ (g_stack+stack_hi)(AX), SP 849 JHI 2(PC) 850 CALL runtime·abort(SB) 851 CMPQ SP, (g_stack+stack_lo)(AX) 852 JHI 2(PC) 853 CALL runtime·abort(SB) 854 RET 855 856 // func cputicks() int64 857 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 858 CMPB runtime·lfenceBeforeRdtsc(SB), $1 859 JNE mfence 860 LFENCE 861 JMP done 862 mfence: 863 MFENCE 864 done: 865 RDTSC 866 SHLQ $32, DX 867 ADDQ DX, AX 868 MOVQ AX, ret+0(FP) 869 RET 870 871 // func aeshash(p unsafe.Pointer, h, s uintptr) uintptr 872 // hash function using AES hardware instructions 873 TEXT runtime·aeshash(SB),NOSPLIT,$0-32 874 MOVQ p+0(FP), AX // ptr to data 875 MOVQ s+16(FP), CX // size 876 LEAQ ret+24(FP), DX 877 JMP runtime·aeshashbody(SB) 878 879 // func aeshashstr(p unsafe.Pointer, h uintptr) uintptr 880 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 881 MOVQ p+0(FP), AX // ptr to string struct 882 MOVQ 8(AX), CX // length of string 883 MOVQ (AX), AX // string data 884 LEAQ ret+16(FP), DX 885 JMP runtime·aeshashbody(SB) 886 887 // AX: data 888 // CX: length 889 // DX: address to put return value 890 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 891 // Fill an SSE register with our seeds. 892 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed 893 PINSRW $4, CX, X0 // 16 bits of length 894 PSHUFHW $0, X0, X0 // repeat length 4 times total 895 MOVO X0, X1 // save unscrambled seed 896 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 897 AESENC X0, X0 // scramble seed 898 899 CMPQ CX, $16 900 JB aes0to15 901 JE aes16 902 CMPQ CX, $32 903 JBE aes17to32 904 CMPQ CX, $64 905 JBE aes33to64 906 CMPQ CX, $128 907 JBE aes65to128 908 JMP aes129plus 909 910 aes0to15: 911 TESTQ CX, CX 912 JE aes0 913 914 ADDQ $16, AX 915 TESTW $0xff0, AX 916 JE endofpage 917 918 // 16 bytes loaded at this address won't cross 919 // a page boundary, so we can load it directly. 920 MOVOU -16(AX), X1 921 ADDQ CX, CX 922 MOVQ $masks<>(SB), AX 923 PAND (AX)(CX*8), X1 924 final1: 925 PXOR X0, X1 // xor data with seed 926 AESENC X1, X1 // scramble combo 3 times 927 AESENC X1, X1 928 AESENC X1, X1 929 MOVQ X1, (DX) 930 RET 931 932 endofpage: 933 // address ends in 1111xxxx. Might be up against 934 // a page boundary, so load ending at last byte. 935 // Then shift bytes down using pshufb. 936 MOVOU -32(AX)(CX*1), X1 937 ADDQ CX, CX 938 MOVQ $shifts<>(SB), AX 939 PSHUFB (AX)(CX*8), X1 940 JMP final1 941 942 aes0: 943 // Return scrambled input seed 944 AESENC X0, X0 945 MOVQ X0, (DX) 946 RET 947 948 aes16: 949 MOVOU (AX), X1 950 JMP final1 951 952 aes17to32: 953 // make second starting seed 954 PXOR runtime·aeskeysched+16(SB), X1 955 AESENC X1, X1 956 957 // load data to be hashed 958 MOVOU (AX), X2 959 MOVOU -16(AX)(CX*1), X3 960 961 // xor with seed 962 PXOR X0, X2 963 PXOR X1, X3 964 965 // scramble 3 times 966 AESENC X2, X2 967 AESENC X3, X3 968 AESENC X2, X2 969 AESENC X3, X3 970 AESENC X2, X2 971 AESENC X3, X3 972 973 // combine results 974 PXOR X3, X2 975 MOVQ X2, (DX) 976 RET 977 978 aes33to64: 979 // make 3 more starting seeds 980 MOVO X1, X2 981 MOVO X1, X3 982 PXOR runtime·aeskeysched+16(SB), X1 983 PXOR runtime·aeskeysched+32(SB), X2 984 PXOR runtime·aeskeysched+48(SB), X3 985 AESENC X1, X1 986 AESENC X2, X2 987 AESENC X3, X3 988 989 MOVOU (AX), X4 990 MOVOU 16(AX), X5 991 MOVOU -32(AX)(CX*1), X6 992 MOVOU -16(AX)(CX*1), X7 993 994 PXOR X0, X4 995 PXOR X1, X5 996 PXOR X2, X6 997 PXOR X3, X7 998 999 AESENC X4, X4 1000 AESENC X5, X5 1001 AESENC X6, X6 1002 AESENC X7, X7 1003 1004 AESENC X4, X4 1005 AESENC X5, X5 1006 AESENC X6, X6 1007 AESENC X7, X7 1008 1009 AESENC X4, X4 1010 AESENC X5, X5 1011 AESENC X6, X6 1012 AESENC X7, X7 1013 1014 PXOR X6, X4 1015 PXOR X7, X5 1016 PXOR X5, X4 1017 MOVQ X4, (DX) 1018 RET 1019 1020 aes65to128: 1021 // make 7 more starting seeds 1022 MOVO X1, X2 1023 MOVO X1, X3 1024 MOVO X1, X4 1025 MOVO X1, X5 1026 MOVO X1, X6 1027 MOVO X1, X7 1028 PXOR runtime·aeskeysched+16(SB), X1 1029 PXOR runtime·aeskeysched+32(SB), X2 1030 PXOR runtime·aeskeysched+48(SB), X3 1031 PXOR runtime·aeskeysched+64(SB), X4 1032 PXOR runtime·aeskeysched+80(SB), X5 1033 PXOR runtime·aeskeysched+96(SB), X6 1034 PXOR runtime·aeskeysched+112(SB), X7 1035 AESENC X1, X1 1036 AESENC X2, X2 1037 AESENC X3, X3 1038 AESENC X4, X4 1039 AESENC X5, X5 1040 AESENC X6, X6 1041 AESENC X7, X7 1042 1043 // load data 1044 MOVOU (AX), X8 1045 MOVOU 16(AX), X9 1046 MOVOU 32(AX), X10 1047 MOVOU 48(AX), X11 1048 MOVOU -64(AX)(CX*1), X12 1049 MOVOU -48(AX)(CX*1), X13 1050 MOVOU -32(AX)(CX*1), X14 1051 MOVOU -16(AX)(CX*1), X15 1052 1053 // xor with seed 1054 PXOR X0, X8 1055 PXOR X1, X9 1056 PXOR X2, X10 1057 PXOR X3, X11 1058 PXOR X4, X12 1059 PXOR X5, X13 1060 PXOR X6, X14 1061 PXOR X7, X15 1062 1063 // scramble 3 times 1064 AESENC X8, X8 1065 AESENC X9, X9 1066 AESENC X10, X10 1067 AESENC X11, X11 1068 AESENC X12, X12 1069 AESENC X13, X13 1070 AESENC X14, X14 1071 AESENC X15, X15 1072 1073 AESENC X8, X8 1074 AESENC X9, X9 1075 AESENC X10, X10 1076 AESENC X11, X11 1077 AESENC X12, X12 1078 AESENC X13, X13 1079 AESENC X14, X14 1080 AESENC X15, X15 1081 1082 AESENC X8, X8 1083 AESENC X9, X9 1084 AESENC X10, X10 1085 AESENC X11, X11 1086 AESENC X12, X12 1087 AESENC X13, X13 1088 AESENC X14, X14 1089 AESENC X15, X15 1090 1091 // combine results 1092 PXOR X12, X8 1093 PXOR X13, X9 1094 PXOR X14, X10 1095 PXOR X15, X11 1096 PXOR X10, X8 1097 PXOR X11, X9 1098 PXOR X9, X8 1099 MOVQ X8, (DX) 1100 RET 1101 1102 aes129plus: 1103 // make 7 more starting seeds 1104 MOVO X1, X2 1105 MOVO X1, X3 1106 MOVO X1, X4 1107 MOVO X1, X5 1108 MOVO X1, X6 1109 MOVO X1, X7 1110 PXOR runtime·aeskeysched+16(SB), X1 1111 PXOR runtime·aeskeysched+32(SB), X2 1112 PXOR runtime·aeskeysched+48(SB), X3 1113 PXOR runtime·aeskeysched+64(SB), X4 1114 PXOR runtime·aeskeysched+80(SB), X5 1115 PXOR runtime·aeskeysched+96(SB), X6 1116 PXOR runtime·aeskeysched+112(SB), X7 1117 AESENC X1, X1 1118 AESENC X2, X2 1119 AESENC X3, X3 1120 AESENC X4, X4 1121 AESENC X5, X5 1122 AESENC X6, X6 1123 AESENC X7, X7 1124 1125 // start with last (possibly overlapping) block 1126 MOVOU -128(AX)(CX*1), X8 1127 MOVOU -112(AX)(CX*1), X9 1128 MOVOU -96(AX)(CX*1), X10 1129 MOVOU -80(AX)(CX*1), X11 1130 MOVOU -64(AX)(CX*1), X12 1131 MOVOU -48(AX)(CX*1), X13 1132 MOVOU -32(AX)(CX*1), X14 1133 MOVOU -16(AX)(CX*1), X15 1134 1135 // xor in seed 1136 PXOR X0, X8 1137 PXOR X1, X9 1138 PXOR X2, X10 1139 PXOR X3, X11 1140 PXOR X4, X12 1141 PXOR X5, X13 1142 PXOR X6, X14 1143 PXOR X7, X15 1144 1145 // compute number of remaining 128-byte blocks 1146 DECQ CX 1147 SHRQ $7, CX 1148 1149 aesloop: 1150 // scramble state 1151 AESENC X8, X8 1152 AESENC X9, X9 1153 AESENC X10, X10 1154 AESENC X11, X11 1155 AESENC X12, X12 1156 AESENC X13, X13 1157 AESENC X14, X14 1158 AESENC X15, X15 1159 1160 // scramble state, xor in a block 1161 MOVOU (AX), X0 1162 MOVOU 16(AX), X1 1163 MOVOU 32(AX), X2 1164 MOVOU 48(AX), X3 1165 AESENC X0, X8 1166 AESENC X1, X9 1167 AESENC X2, X10 1168 AESENC X3, X11 1169 MOVOU 64(AX), X4 1170 MOVOU 80(AX), X5 1171 MOVOU 96(AX), X6 1172 MOVOU 112(AX), X7 1173 AESENC X4, X12 1174 AESENC X5, X13 1175 AESENC X6, X14 1176 AESENC X7, X15 1177 1178 ADDQ $128, AX 1179 DECQ CX 1180 JNE aesloop 1181 1182 // 3 more scrambles to finish 1183 AESENC X8, X8 1184 AESENC X9, X9 1185 AESENC X10, X10 1186 AESENC X11, X11 1187 AESENC X12, X12 1188 AESENC X13, X13 1189 AESENC X14, X14 1190 AESENC X15, X15 1191 AESENC X8, X8 1192 AESENC X9, X9 1193 AESENC X10, X10 1194 AESENC X11, X11 1195 AESENC X12, X12 1196 AESENC X13, X13 1197 AESENC X14, X14 1198 AESENC X15, X15 1199 AESENC X8, X8 1200 AESENC X9, X9 1201 AESENC X10, X10 1202 AESENC X11, X11 1203 AESENC X12, X12 1204 AESENC X13, X13 1205 AESENC X14, X14 1206 AESENC X15, X15 1207 1208 PXOR X12, X8 1209 PXOR X13, X9 1210 PXOR X14, X10 1211 PXOR X15, X11 1212 PXOR X10, X8 1213 PXOR X11, X9 1214 PXOR X9, X8 1215 MOVQ X8, (DX) 1216 RET 1217 1218 // func aeshash32(p unsafe.Pointer, h uintptr) uintptr 1219 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1220 MOVQ p+0(FP), AX // ptr to data 1221 MOVQ h+8(FP), X0 // seed 1222 PINSRD $2, (AX), X0 // data 1223 AESENC runtime·aeskeysched+0(SB), X0 1224 AESENC runtime·aeskeysched+16(SB), X0 1225 AESENC runtime·aeskeysched+32(SB), X0 1226 MOVQ X0, ret+16(FP) 1227 RET 1228 1229 // func aeshash64(p unsafe.Pointer, h uintptr) uintptr 1230 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1231 MOVQ p+0(FP), AX // ptr to data 1232 MOVQ h+8(FP), X0 // seed 1233 PINSRQ $1, (AX), X0 // data 1234 AESENC runtime·aeskeysched+0(SB), X0 1235 AESENC runtime·aeskeysched+16(SB), X0 1236 AESENC runtime·aeskeysched+32(SB), X0 1237 MOVQ X0, ret+16(FP) 1238 RET 1239 1240 // simple mask to get rid of data in the high part of the register. 1241 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1242 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1243 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1244 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1245 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1246 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1247 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1248 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1249 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1250 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1251 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1252 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1253 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1254 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1255 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1256 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1257 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1258 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1259 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1260 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1261 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1262 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1263 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1264 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1265 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1266 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1267 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1268 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1269 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1270 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1271 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1272 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1273 GLOBL masks<>(SB),RODATA,$256 1274 1275 // func checkASM() bool 1276 TEXT ·checkASM(SB),NOSPLIT,$0-1 1277 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1278 MOVQ $masks<>(SB), AX 1279 MOVQ $shifts<>(SB), BX 1280 ORQ BX, AX 1281 TESTQ $15, AX 1282 SETEQ ret+0(FP) 1283 RET 1284 1285 // these are arguments to pshufb. They move data down from 1286 // the high bytes of the register to the low bytes of the register. 1287 // index is how many bytes to move. 1288 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1289 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1290 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1291 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1292 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1293 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1294 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1295 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1296 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1297 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1298 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1299 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1300 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1301 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1302 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1303 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1304 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1305 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1306 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1307 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1308 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1309 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1310 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1311 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1312 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1313 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1314 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1315 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1316 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1317 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1318 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1319 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1320 GLOBL shifts<>(SB),RODATA,$256 1321 1322 TEXT runtime·return0(SB), NOSPLIT, $0 1323 MOVL $0, AX 1324 RET 1325 1326 1327 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1328 // Must obey the gcc calling convention. 1329 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1330 get_tls(CX) 1331 MOVQ g(CX), AX 1332 MOVQ g_m(AX), AX 1333 MOVQ m_curg(AX), AX 1334 MOVQ (g_stack+stack_hi)(AX), AX 1335 RET 1336 1337 // The top-most function running on a goroutine 1338 // returns to goexit+PCQuantum. 1339 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1340 BYTE $0x90 // NOP 1341 CALL runtime·goexit1(SB) // does not return 1342 // traceback from goexit1 must hit code range of goexit 1343 BYTE $0x90 // NOP 1344 1345 // This is called from .init_array and follows the platform, not Go, ABI. 1346 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1347 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1348 MOVQ runtime·lastmoduledatap(SB), AX 1349 MOVQ DI, moduledata_next(AX) 1350 MOVQ DI, runtime·lastmoduledatap(SB) 1351 POPQ R15 1352 RET 1353 1354 // gcWriteBarrier performs a heap pointer write and informs the GC. 1355 // 1356 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1357 // - DI is the destination of the write 1358 // - AX is the value being written at DI 1359 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1360 // but may clobber others (e.g., SSE registers). 1361 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 1362 // Save the registers clobbered by the fast path. This is slightly 1363 // faster than having the caller spill these. 1364 MOVQ R14, 104(SP) 1365 MOVQ R13, 112(SP) 1366 // TODO: Consider passing g.m.p in as an argument so they can be shared 1367 // across a sequence of write barriers. 1368 get_tls(R13) 1369 MOVQ g(R13), R13 1370 MOVQ g_m(R13), R13 1371 MOVQ m_p(R13), R13 1372 MOVQ (p_wbBuf+wbBuf_next)(R13), R14 1373 // Increment wbBuf.next position. 1374 LEAQ 16(R14), R14 1375 MOVQ R14, (p_wbBuf+wbBuf_next)(R13) 1376 CMPQ R14, (p_wbBuf+wbBuf_end)(R13) 1377 // Record the write. 1378 MOVQ AX, -16(R14) // Record value 1379 // Note: This turns bad pointer writes into bad 1380 // pointer reads, which could be confusing. We could avoid 1381 // reading from obviously bad pointers, which would 1382 // take care of the vast majority of these. We could 1383 // patch this up in the signal handler, or use XCHG to 1384 // combine the read and the write. 1385 MOVQ (DI), R13 1386 MOVQ R13, -8(R14) // Record *slot 1387 // Is the buffer full? (flags set in CMPQ above) 1388 JEQ flush 1389 ret: 1390 MOVQ 104(SP), R14 1391 MOVQ 112(SP), R13 1392 // Do the write. 1393 MOVQ AX, (DI) 1394 RET 1395 1396 flush: 1397 // Save all general purpose registers since these could be 1398 // clobbered by wbBufFlush and were not saved by the caller. 1399 // It is possible for wbBufFlush to clobber other registers 1400 // (e.g., SSE registers), but the compiler takes care of saving 1401 // those in the caller if necessary. This strikes a balance 1402 // with registers that are likely to be used. 1403 // 1404 // We don't have type information for these, but all code under 1405 // here is NOSPLIT, so nothing will observe these. 1406 // 1407 // TODO: We could strike a different balance; e.g., saving X0 1408 // and not saving GP registers that are less likely to be used. 1409 MOVQ DI, 0(SP) // Also first argument to wbBufFlush 1410 MOVQ AX, 8(SP) // Also second argument to wbBufFlush 1411 MOVQ BX, 16(SP) 1412 MOVQ CX, 24(SP) 1413 MOVQ DX, 32(SP) 1414 // DI already saved 1415 MOVQ SI, 40(SP) 1416 MOVQ BP, 48(SP) 1417 MOVQ R8, 56(SP) 1418 MOVQ R9, 64(SP) 1419 MOVQ R10, 72(SP) 1420 MOVQ R11, 80(SP) 1421 MOVQ R12, 88(SP) 1422 // R13 already saved 1423 // R14 already saved 1424 MOVQ R15, 96(SP) 1425 1426 // This takes arguments DI and AX 1427 CALL runtime·wbBufFlush(SB) 1428 1429 MOVQ 0(SP), DI 1430 MOVQ 8(SP), AX 1431 MOVQ 16(SP), BX 1432 MOVQ 24(SP), CX 1433 MOVQ 32(SP), DX 1434 MOVQ 40(SP), SI 1435 MOVQ 48(SP), BP 1436 MOVQ 56(SP), R8 1437 MOVQ 64(SP), R9 1438 MOVQ 72(SP), R10 1439 MOVQ 80(SP), R11 1440 MOVQ 88(SP), R12 1441 MOVQ 96(SP), R15 1442 JMP ret 1443 1444 DATA debugCallFrameTooLarge<>+0x00(SB)/8, $"call fra" 1445 DATA debugCallFrameTooLarge<>+0x08(SB)/8, $"me too l" 1446 DATA debugCallFrameTooLarge<>+0x10(SB)/4, $"arge" 1447 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $0x14 // Size duplicated below 1448 1449 // debugCallV1 is the entry point for debugger-injected function 1450 // calls on running goroutines. It informs the runtime that a 1451 // debug call has been injected and creates a call frame for the 1452 // debugger to fill in. 1453 // 1454 // To inject a function call, a debugger should: 1455 // 1. Check that the goroutine is in state _Grunning and that 1456 // there are at least 256 bytes free on the stack. 1457 // 2. Push the current PC on the stack (updating SP). 1458 // 3. Write the desired argument frame size at SP-16 (using the SP 1459 // after step 2). 1460 // 4. Save all machine registers (including flags and XMM reigsters) 1461 // so they can be restored later by the debugger. 1462 // 5. Set the PC to debugCallV1 and resume execution. 1463 // 1464 // If the goroutine is in state _Grunnable, then it's not generally 1465 // safe to inject a call because it may return out via other runtime 1466 // operations. Instead, the debugger should unwind the stack to find 1467 // the return to non-runtime code, add a temporary breakpoint there, 1468 // and inject the call once that breakpoint is hit. 1469 // 1470 // If the goroutine is in any other state, it's not safe to inject a call. 1471 // 1472 // This function communicates back to the debugger by setting RAX and 1473 // invoking INT3 to raise a breakpoint signal. See the comments in the 1474 // implementation for the protocol the debugger is expected to 1475 // follow. InjectDebugCall in the runtime tests demonstrates this protocol. 1476 // 1477 // The debugger must ensure that any pointers passed to the function 1478 // obey escape analysis requirements. Specifically, it must not pass 1479 // a stack pointer to an escaping argument. debugCallV1 cannot check 1480 // this invariant. 1481 TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 1482 // Save all registers that may contain pointers in GC register 1483 // map order (see ssa.registersAMD64). This makes it possible 1484 // to copy the stack while updating pointers currently held in 1485 // registers, and for the GC to find roots in registers. 1486 // 1487 // We can't do anything that might clobber any of these 1488 // registers before this. 1489 MOVQ R15, r15-(14*8+8)(SP) 1490 MOVQ R14, r14-(13*8+8)(SP) 1491 MOVQ R13, r13-(12*8+8)(SP) 1492 MOVQ R12, r12-(11*8+8)(SP) 1493 MOVQ R11, r11-(10*8+8)(SP) 1494 MOVQ R10, r10-(9*8+8)(SP) 1495 MOVQ R9, r9-(8*8+8)(SP) 1496 MOVQ R8, r8-(7*8+8)(SP) 1497 MOVQ DI, di-(6*8+8)(SP) 1498 MOVQ SI, si-(5*8+8)(SP) 1499 MOVQ BP, bp-(4*8+8)(SP) 1500 MOVQ BX, bx-(3*8+8)(SP) 1501 MOVQ DX, dx-(2*8+8)(SP) 1502 // Save the frame size before we clobber it. Either of the last 1503 // saves could clobber this depending on whether there's a saved BP. 1504 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue 1505 MOVQ CX, cx-(1*8+8)(SP) 1506 MOVQ AX, ax-(0*8+8)(SP) 1507 1508 // Save the argument frame size. 1509 MOVQ DX, frameSize-128(SP) 1510 1511 // Perform a safe-point check. 1512 MOVQ retpc-8(FP), AX // Caller's PC 1513 MOVQ AX, 0(SP) 1514 CALL runtime·debugCallCheck(SB) 1515 MOVQ 8(SP), AX 1516 TESTQ AX, AX 1517 JZ good 1518 // The safety check failed. Put the reason string at the top 1519 // of the stack. 1520 MOVQ AX, 0(SP) 1521 MOVQ 16(SP), AX 1522 MOVQ AX, 8(SP) 1523 // Set AX to 8 and invoke INT3. The debugger should get the 1524 // reason a call can't be injected from the top of the stack 1525 // and resume execution. 1526 MOVQ $8, AX 1527 BYTE $0xcc 1528 JMP restore 1529 1530 good: 1531 // Registers are saved and it's safe to make a call. 1532 // Open up a call frame, moving the stack if necessary. 1533 // 1534 // Once the frame is allocated, this will set AX to 0 and 1535 // invoke INT3. The debugger should write the argument 1536 // frame for the call at SP, push the trapping PC on the 1537 // stack, set the PC to the function to call, set RCX to point 1538 // to the closure (if a closure call), and resume execution. 1539 // 1540 // If the function returns, this will set AX to 1 and invoke 1541 // INT3. The debugger can then inspect any return value saved 1542 // on the stack at SP and resume execution again. 1543 // 1544 // If the function panics, this will set AX to 2 and invoke INT3. 1545 // The interface{} value of the panic will be at SP. The debugger 1546 // can inspect the panic value and resume execution again. 1547 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1548 CMPQ AX, $MAXSIZE; \ 1549 JA 5(PC); \ 1550 MOVQ $NAME(SB), AX; \ 1551 MOVQ AX, 0(SP); \ 1552 CALL runtime·debugCallWrap(SB); \ 1553 JMP restore 1554 1555 MOVQ frameSize-128(SP), AX 1556 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1557 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1558 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1559 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1560 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1561 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1562 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1563 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1564 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1565 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1566 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1567 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1568 // The frame size is too large. Report the error. 1569 MOVQ $debugCallFrameTooLarge<>(SB), AX 1570 MOVQ AX, 0(SP) 1571 MOVQ $0x14, 8(SP) 1572 MOVQ $8, AX 1573 BYTE $0xcc 1574 JMP restore 1575 1576 restore: 1577 // Calls and failures resume here. 1578 // 1579 // Set AX to 16 and invoke INT3. The debugger should restore 1580 // all registers except RIP and RSP and resume execution. 1581 MOVQ $16, AX 1582 BYTE $0xcc 1583 // We must not modify flags after this point. 1584 1585 // Restore pointer-containing registers, which may have been 1586 // modified from the debugger's copy by stack copying. 1587 MOVQ ax-(0*8+8)(SP), AX 1588 MOVQ cx-(1*8+8)(SP), CX 1589 MOVQ dx-(2*8+8)(SP), DX 1590 MOVQ bx-(3*8+8)(SP), BX 1591 MOVQ bp-(4*8+8)(SP), BP 1592 MOVQ si-(5*8+8)(SP), SI 1593 MOVQ di-(6*8+8)(SP), DI 1594 MOVQ r8-(7*8+8)(SP), R8 1595 MOVQ r9-(8*8+8)(SP), R9 1596 MOVQ r10-(9*8+8)(SP), R10 1597 MOVQ r11-(10*8+8)(SP), R11 1598 MOVQ r12-(11*8+8)(SP), R12 1599 MOVQ r13-(12*8+8)(SP), R13 1600 MOVQ r14-(13*8+8)(SP), R14 1601 MOVQ r15-(14*8+8)(SP), R15 1602 1603 RET 1604 1605 #define DEBUG_CALL_FN(NAME,MAXSIZE) \ 1606 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 1607 NO_LOCAL_POINTERS; \ 1608 MOVQ $0, AX; \ 1609 BYTE $0xcc; \ 1610 MOVQ $1, AX; \ 1611 BYTE $0xcc; \ 1612 RET 1613 DEBUG_CALL_FN(debugCall32<>, 32) 1614 DEBUG_CALL_FN(debugCall64<>, 64) 1615 DEBUG_CALL_FN(debugCall128<>, 128) 1616 DEBUG_CALL_FN(debugCall256<>, 256) 1617 DEBUG_CALL_FN(debugCall512<>, 512) 1618 DEBUG_CALL_FN(debugCall1024<>, 1024) 1619 DEBUG_CALL_FN(debugCall2048<>, 2048) 1620 DEBUG_CALL_FN(debugCall4096<>, 4096) 1621 DEBUG_CALL_FN(debugCall8192<>, 8192) 1622 DEBUG_CALL_FN(debugCall16384<>, 16384) 1623 DEBUG_CALL_FN(debugCall32768<>, 32768) 1624 DEBUG_CALL_FN(debugCall65536<>, 65536) 1625 1626 // func debugCallPanicked(val interface{}) 1627 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 1628 // Copy the panic value to the top of stack. 1629 MOVQ val_type+0(FP), AX 1630 MOVQ AX, 0(SP) 1631 MOVQ val_data+8(FP), AX 1632 MOVQ AX, 8(SP) 1633 MOVQ $2, AX 1634 BYTE $0xcc 1635 RET