github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtime·rt0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtime·newosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtime·rt0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtime·g0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 // first see if CPUID instruction is supported. 113 PUSHFL 114 PUSHFL 115 XORL $(1<<21), 0(SP) // flip ID bit 116 POPFL 117 PUSHFL 118 POPL AX 119 XORL 0(SP), AX 120 POPFL // restore EFLAGS 121 TESTL $(1<<21), AX 122 JNE has_cpuid 123 124 bad_proc: // show that the program requires MMX. 125 MOVL $2, 0(SP) 126 MOVL $bad_proc_msg<>(SB), 4(SP) 127 MOVL $0x3d, 8(SP) 128 CALL runtime·write(SB) 129 MOVL $1, 0(SP) 130 CALL runtime·exit(SB) 131 CALL runtime·abort(SB) 132 133 has_cpuid: 134 MOVL $0, AX 135 CPUID 136 MOVL AX, SI 137 CMPL AX, $0 138 JE nocpuinfo 139 140 // Figure out how to serialize RDTSC. 141 // On Intel processors LFENCE is enough. AMD requires MFENCE. 142 // Don't know about the rest, so let's do MFENCE. 143 CMPL BX, $0x756E6547 // "Genu" 144 JNE notintel 145 CMPL DX, $0x49656E69 // "ineI" 146 JNE notintel 147 CMPL CX, $0x6C65746E // "ntel" 148 JNE notintel 149 MOVB $1, runtime·isIntel(SB) 150 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 151 notintel: 152 153 // Load EAX=1 cpuid flags 154 MOVL $1, AX 155 CPUID 156 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 157 MOVL AX, runtime·processorVersionInfo(SB) 158 159 // Check for MMX support 160 TESTL $(1<<23), DX // MMX 161 JZ bad_proc 162 163 nocpuinfo: 164 // if there is an _cgo_init, call it to let it 165 // initialize and to set up GS. if not, 166 // we set up GS ourselves. 167 MOVL _cgo_init(SB), AX 168 TESTL AX, AX 169 JZ needtls 170 #ifdef GOOS_android 171 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 172 // Compensate for tls_g (+8). 173 MOVL -8(TLS), BX 174 MOVL BX, 12(SP) 175 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 176 #else 177 MOVL $0, BX 178 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS 179 MOVL BX, 8(SP) 180 #endif 181 MOVL $setg_gcc<>(SB), BX 182 MOVL BX, 4(SP) // arg 2: setg_gcc 183 MOVL BP, 0(SP) // arg 1: g0 184 CALL AX 185 186 // update stackguard after _cgo_init 187 MOVL $runtime·g0(SB), CX 188 MOVL (g_stack+stack_lo)(CX), AX 189 ADDL $const__StackGuard, AX 190 MOVL AX, g_stackguard0(CX) 191 MOVL AX, g_stackguard1(CX) 192 193 #ifndef GOOS_windows 194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 195 JMP ok 196 #endif 197 needtls: 198 #ifdef GOOS_openbsd 199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases 200 JMP ok 201 #endif 202 #ifdef GOOS_plan9 203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 204 JMP ok 205 #endif 206 207 // set up %gs 208 CALL ldt0setup<>(SB) 209 210 // store through it, to make sure it works 211 get_tls(BX) 212 MOVL $0x123, g(BX) 213 MOVL runtime·m0+m_tls(SB), AX 214 CMPL AX, $0x123 215 JEQ ok 216 MOVL AX, 0 // abort 217 ok: 218 // set up m and g "registers" 219 get_tls(BX) 220 LEAL runtime·g0(SB), DX 221 MOVL DX, g(BX) 222 LEAL runtime·m0(SB), AX 223 224 // save m->g0 = g0 225 MOVL DX, m_g0(AX) 226 // save g0->m = m0 227 MOVL AX, g_m(DX) 228 229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 230 231 // convention is D is always cleared 232 CLD 233 234 CALL runtime·check(SB) 235 236 // saved argc, argv 237 MOVL 120(SP), AX 238 MOVL AX, 0(SP) 239 MOVL 124(SP), AX 240 MOVL AX, 4(SP) 241 CALL runtime·args(SB) 242 CALL runtime·osinit(SB) 243 CALL runtime·schedinit(SB) 244 245 // create a new goroutine to start program 246 PUSHL $runtime·mainPC(SB) // entry 247 PUSHL $0 // arg size 248 CALL runtime·newproc(SB) 249 POPL AX 250 POPL AX 251 252 // start this M 253 CALL runtime·mstart(SB) 254 255 CALL runtime·abort(SB) 256 RET 257 258 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n" 259 GLOBL bad_proc_msg<>(SB), RODATA, $61 260 261 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 262 GLOBL runtime·mainPC(SB),RODATA,$4 263 264 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 265 INT $3 266 RET 267 268 TEXT runtime·asminit(SB),NOSPLIT,$0-0 269 // Linux and MinGW start the FPU in extended double precision. 270 // Other operating systems use double precision. 271 // Change to double precision to match them, 272 // and to match other hardware that only has double. 273 FLDCW runtime·controlWord64(SB) 274 RET 275 276 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 277 CALL runtime·mstart0(SB) 278 RET // not reached 279 280 /* 281 * go-routine 282 */ 283 284 // void gogo(Gobuf*) 285 // restore state from Gobuf; longjmp 286 TEXT runtime·gogo(SB), NOSPLIT, $0-4 287 MOVL buf+0(FP), BX // gobuf 288 MOVL gobuf_g(BX), DX 289 MOVL 0(DX), CX // make sure g != nil 290 JMP gogo<>(SB) 291 292 TEXT gogo<>(SB), NOSPLIT, $0 293 get_tls(CX) 294 MOVL DX, g(CX) 295 MOVL gobuf_sp(BX), SP // restore SP 296 MOVL gobuf_ret(BX), AX 297 MOVL gobuf_ctxt(BX), DX 298 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 299 MOVL $0, gobuf_ret(BX) 300 MOVL $0, gobuf_ctxt(BX) 301 MOVL gobuf_pc(BX), BX 302 JMP BX 303 304 // func mcall(fn func(*g)) 305 // Switch to m->g0's stack, call fn(g). 306 // Fn must never return. It should gogo(&g->sched) 307 // to keep running g. 308 TEXT runtime·mcall(SB), NOSPLIT, $0-4 309 MOVL fn+0(FP), DI 310 311 get_tls(DX) 312 MOVL g(DX), AX // save state in g->sched 313 MOVL 0(SP), BX // caller's PC 314 MOVL BX, (g_sched+gobuf_pc)(AX) 315 LEAL fn+0(FP), BX // caller's SP 316 MOVL BX, (g_sched+gobuf_sp)(AX) 317 318 // switch to m->g0 & its stack, call fn 319 MOVL g(DX), BX 320 MOVL g_m(BX), BX 321 MOVL m_g0(BX), SI 322 CMPL SI, AX // if g == m->g0 call badmcall 323 JNE 3(PC) 324 MOVL $runtime·badmcall(SB), AX 325 JMP AX 326 MOVL SI, g(DX) // g = m->g0 327 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 328 PUSHL AX 329 MOVL DI, DX 330 MOVL 0(DI), DI 331 CALL DI 332 POPL AX 333 MOVL $runtime·badmcall2(SB), AX 334 JMP AX 335 RET 336 337 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 338 // of the G stack. We need to distinguish the routine that 339 // lives at the bottom of the G stack from the one that lives 340 // at the top of the system stack because the one at the top of 341 // the system stack terminates the stack walk (see topofstack()). 342 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 343 RET 344 345 // func systemstack(fn func()) 346 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 347 MOVL fn+0(FP), DI // DI = fn 348 get_tls(CX) 349 MOVL g(CX), AX // AX = g 350 MOVL g_m(AX), BX // BX = m 351 352 CMPL AX, m_gsignal(BX) 353 JEQ noswitch 354 355 MOVL m_g0(BX), DX // DX = g0 356 CMPL AX, DX 357 JEQ noswitch 358 359 CMPL AX, m_curg(BX) 360 JNE bad 361 362 // switch stacks 363 // save our state in g->sched. Pretend to 364 // be systemstack_switch if the G stack is scanned. 365 CALL gosave_systemstack_switch<>(SB) 366 367 // switch to g0 368 get_tls(CX) 369 MOVL DX, g(CX) 370 MOVL (g_sched+gobuf_sp)(DX), BX 371 MOVL BX, SP 372 373 // call target function 374 MOVL DI, DX 375 MOVL 0(DI), DI 376 CALL DI 377 378 // switch back to g 379 get_tls(CX) 380 MOVL g(CX), AX 381 MOVL g_m(AX), BX 382 MOVL m_curg(BX), AX 383 MOVL AX, g(CX) 384 MOVL (g_sched+gobuf_sp)(AX), SP 385 MOVL $0, (g_sched+gobuf_sp)(AX) 386 RET 387 388 noswitch: 389 // already on system stack; tail call the function 390 // Using a tail call here cleans up tracebacks since we won't stop 391 // at an intermediate systemstack. 392 MOVL DI, DX 393 MOVL 0(DI), DI 394 JMP DI 395 396 bad: 397 // Bad: g is not gsignal, not g0, not curg. What is it? 398 // Hide call from linker nosplit analysis. 399 MOVL $runtime·badsystemstack(SB), AX 400 CALL AX 401 INT $3 402 403 /* 404 * support for morestack 405 */ 406 407 // Called during function prolog when more stack is needed. 408 // 409 // The traceback routines see morestack on a g0 as being 410 // the top of a stack (for example, morestack calling newstack 411 // calling the scheduler calling newm calling gc), so we must 412 // record an argument size. For that purpose, it has no arguments. 413 TEXT runtime·morestack(SB),NOSPLIT,$0-0 414 // Cannot grow scheduler stack (m->g0). 415 get_tls(CX) 416 MOVL g(CX), BX 417 MOVL g_m(BX), BX 418 MOVL m_g0(BX), SI 419 CMPL g(CX), SI 420 JNE 3(PC) 421 CALL runtime·badmorestackg0(SB) 422 CALL runtime·abort(SB) 423 424 // Cannot grow signal stack. 425 MOVL m_gsignal(BX), SI 426 CMPL g(CX), SI 427 JNE 3(PC) 428 CALL runtime·badmorestackgsignal(SB) 429 CALL runtime·abort(SB) 430 431 // Called from f. 432 // Set m->morebuf to f's caller. 433 NOP SP // tell vet SP changed - stop checking offsets 434 MOVL 4(SP), DI // f's caller's PC 435 MOVL DI, (m_morebuf+gobuf_pc)(BX) 436 LEAL 8(SP), CX // f's caller's SP 437 MOVL CX, (m_morebuf+gobuf_sp)(BX) 438 get_tls(CX) 439 MOVL g(CX), SI 440 MOVL SI, (m_morebuf+gobuf_g)(BX) 441 442 // Set g->sched to context in f. 443 MOVL 0(SP), AX // f's PC 444 MOVL AX, (g_sched+gobuf_pc)(SI) 445 LEAL 4(SP), AX // f's SP 446 MOVL AX, (g_sched+gobuf_sp)(SI) 447 MOVL DX, (g_sched+gobuf_ctxt)(SI) 448 449 // Call newstack on m->g0's stack. 450 MOVL m_g0(BX), BP 451 MOVL BP, g(CX) 452 MOVL (g_sched+gobuf_sp)(BP), AX 453 MOVL -4(AX), BX // fault if CALL would, before smashing SP 454 MOVL AX, SP 455 CALL runtime·newstack(SB) 456 CALL runtime·abort(SB) // crash if newstack returns 457 RET 458 459 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 460 MOVL $0, DX 461 JMP runtime·morestack(SB) 462 463 // reflectcall: call a function with the given argument list 464 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 465 // we don't have variable-sized frames, so we use a small number 466 // of constant-sized-frame functions to encode a few bits of size in the pc. 467 // Caution: ugly multiline assembly macros in your future! 468 469 #define DISPATCH(NAME,MAXSIZE) \ 470 CMPL CX, $MAXSIZE; \ 471 JA 3(PC); \ 472 MOVL $NAME(SB), AX; \ 473 JMP AX 474 // Note: can't just "JMP NAME(SB)" - bad inlining results. 475 476 TEXT ·reflectcall(SB), NOSPLIT, $0-28 477 MOVL frameSize+20(FP), CX 478 DISPATCH(runtime·call16, 16) 479 DISPATCH(runtime·call32, 32) 480 DISPATCH(runtime·call64, 64) 481 DISPATCH(runtime·call128, 128) 482 DISPATCH(runtime·call256, 256) 483 DISPATCH(runtime·call512, 512) 484 DISPATCH(runtime·call1024, 1024) 485 DISPATCH(runtime·call2048, 2048) 486 DISPATCH(runtime·call4096, 4096) 487 DISPATCH(runtime·call8192, 8192) 488 DISPATCH(runtime·call16384, 16384) 489 DISPATCH(runtime·call32768, 32768) 490 DISPATCH(runtime·call65536, 65536) 491 DISPATCH(runtime·call131072, 131072) 492 DISPATCH(runtime·call262144, 262144) 493 DISPATCH(runtime·call524288, 524288) 494 DISPATCH(runtime·call1048576, 1048576) 495 DISPATCH(runtime·call2097152, 2097152) 496 DISPATCH(runtime·call4194304, 4194304) 497 DISPATCH(runtime·call8388608, 8388608) 498 DISPATCH(runtime·call16777216, 16777216) 499 DISPATCH(runtime·call33554432, 33554432) 500 DISPATCH(runtime·call67108864, 67108864) 501 DISPATCH(runtime·call134217728, 134217728) 502 DISPATCH(runtime·call268435456, 268435456) 503 DISPATCH(runtime·call536870912, 536870912) 504 DISPATCH(runtime·call1073741824, 1073741824) 505 MOVL $runtime·badreflectcall(SB), AX 506 JMP AX 507 508 #define CALLFN(NAME,MAXSIZE) \ 509 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \ 510 NO_LOCAL_POINTERS; \ 511 /* copy arguments to stack */ \ 512 MOVL stackArgs+8(FP), SI; \ 513 MOVL stackArgsSize+12(FP), CX; \ 514 MOVL SP, DI; \ 515 REP;MOVSB; \ 516 /* call function */ \ 517 MOVL f+4(FP), DX; \ 518 MOVL (DX), AX; \ 519 PCDATA $PCDATA_StackMapIndex, $0; \ 520 CALL AX; \ 521 /* copy return values back */ \ 522 MOVL stackArgsType+0(FP), DX; \ 523 MOVL stackArgs+8(FP), DI; \ 524 MOVL stackArgsSize+12(FP), CX; \ 525 MOVL stackRetOffset+16(FP), BX; \ 526 MOVL SP, SI; \ 527 ADDL BX, DI; \ 528 ADDL BX, SI; \ 529 SUBL BX, CX; \ 530 CALL callRet<>(SB); \ 531 RET 532 533 // callRet copies return values back at the end of call*. This is a 534 // separate function so it can allocate stack space for the arguments 535 // to reflectcallmove. It does not follow the Go ABI; it expects its 536 // arguments in registers. 537 TEXT callRet<>(SB), NOSPLIT, $20-0 538 MOVL DX, 0(SP) 539 MOVL DI, 4(SP) 540 MOVL SI, 8(SP) 541 MOVL CX, 12(SP) 542 MOVL $0, 16(SP) 543 CALL runtime·reflectcallmove(SB) 544 RET 545 546 CALLFN(·call16, 16) 547 CALLFN(·call32, 32) 548 CALLFN(·call64, 64) 549 CALLFN(·call128, 128) 550 CALLFN(·call256, 256) 551 CALLFN(·call512, 512) 552 CALLFN(·call1024, 1024) 553 CALLFN(·call2048, 2048) 554 CALLFN(·call4096, 4096) 555 CALLFN(·call8192, 8192) 556 CALLFN(·call16384, 16384) 557 CALLFN(·call32768, 32768) 558 CALLFN(·call65536, 65536) 559 CALLFN(·call131072, 131072) 560 CALLFN(·call262144, 262144) 561 CALLFN(·call524288, 524288) 562 CALLFN(·call1048576, 1048576) 563 CALLFN(·call2097152, 2097152) 564 CALLFN(·call4194304, 4194304) 565 CALLFN(·call8388608, 8388608) 566 CALLFN(·call16777216, 16777216) 567 CALLFN(·call33554432, 33554432) 568 CALLFN(·call67108864, 67108864) 569 CALLFN(·call134217728, 134217728) 570 CALLFN(·call268435456, 268435456) 571 CALLFN(·call536870912, 536870912) 572 CALLFN(·call1073741824, 1073741824) 573 574 TEXT runtime·procyield(SB),NOSPLIT,$0-0 575 MOVL cycles+0(FP), AX 576 again: 577 PAUSE 578 SUBL $1, AX 579 JNZ again 580 RET 581 582 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 583 // Stores are already ordered on x86, so this is just a 584 // compile barrier. 585 RET 586 587 // void jmpdefer(fn, sp); 588 // called from deferreturn. 589 // 1. pop the caller 590 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 591 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 592 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 593 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 594 // 3. jmp to the argument 595 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 596 MOVL fv+0(FP), DX // fn 597 MOVL argp+4(FP), BX // caller sp 598 LEAL -4(BX), SP // caller sp after CALL 599 #ifdef GOBUILDMODE_shared 600 SUBL $16, (SP) // return to CALL again 601 #else 602 SUBL $5, (SP) // return to CALL again 603 #endif 604 MOVL 0(DX), BX 605 JMP BX // but first run the deferred function 606 607 // Save state of caller into g->sched, 608 // but using fake PC from systemstack_switch. 609 // Must only be called from functions with no locals ($0) 610 // or else unwinding from systemstack_switch is incorrect. 611 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0 612 PUSHL AX 613 PUSHL BX 614 get_tls(BX) 615 MOVL g(BX), BX 616 LEAL arg+0(FP), AX 617 MOVL AX, (g_sched+gobuf_sp)(BX) 618 MOVL $runtime·systemstack_switch(SB), AX 619 MOVL AX, (g_sched+gobuf_pc)(BX) 620 MOVL $0, (g_sched+gobuf_ret)(BX) 621 // Assert ctxt is zero. See func save. 622 MOVL (g_sched+gobuf_ctxt)(BX), AX 623 TESTL AX, AX 624 JZ 2(PC) 625 CALL runtime·abort(SB) 626 POPL BX 627 POPL AX 628 RET 629 630 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 631 // Call fn(arg) aligned appropriately for the gcc ABI. 632 // Called on a system stack, and there may be no g yet (during needm). 633 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8 634 MOVL fn+0(FP), AX 635 MOVL arg+4(FP), BX 636 MOVL SP, DX 637 SUBL $32, SP 638 ANDL $~15, SP // alignment, perhaps unnecessary 639 MOVL DX, 8(SP) // save old SP 640 MOVL BX, 0(SP) // first argument in x86-32 ABI 641 CALL AX 642 MOVL 8(SP), DX 643 MOVL DX, SP 644 RET 645 646 // func asmcgocall(fn, arg unsafe.Pointer) int32 647 // Call fn(arg) on the scheduler stack, 648 // aligned appropriately for the gcc ABI. 649 // See cgocall.go for more details. 650 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 651 MOVL fn+0(FP), AX 652 MOVL arg+4(FP), BX 653 654 MOVL SP, DX 655 656 // Figure out if we need to switch to m->g0 stack. 657 // We get called to create new OS threads too, and those 658 // come in on the m->g0 stack already. 659 get_tls(CX) 660 MOVL g(CX), BP 661 CMPL BP, $0 662 JEQ nosave // Don't even have a G yet. 663 MOVL g_m(BP), BP 664 MOVL m_g0(BP), SI 665 MOVL g(CX), DI 666 CMPL SI, DI 667 JEQ noswitch 668 CMPL DI, m_gsignal(BP) 669 JEQ noswitch 670 CALL gosave_systemstack_switch<>(SB) 671 get_tls(CX) 672 MOVL SI, g(CX) 673 MOVL (g_sched+gobuf_sp)(SI), SP 674 675 noswitch: 676 // Now on a scheduling stack (a pthread-created stack). 677 SUBL $32, SP 678 ANDL $~15, SP // alignment, perhaps unnecessary 679 MOVL DI, 8(SP) // save g 680 MOVL (g_stack+stack_hi)(DI), DI 681 SUBL DX, DI 682 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 683 MOVL BX, 0(SP) // first argument in x86-32 ABI 684 CALL AX 685 686 // Restore registers, g, stack pointer. 687 get_tls(CX) 688 MOVL 8(SP), DI 689 MOVL (g_stack+stack_hi)(DI), SI 690 SUBL 4(SP), SI 691 MOVL DI, g(CX) 692 MOVL SI, SP 693 694 MOVL AX, ret+8(FP) 695 RET 696 nosave: 697 // Now on a scheduling stack (a pthread-created stack). 698 SUBL $32, SP 699 ANDL $~15, SP // alignment, perhaps unnecessary 700 MOVL DX, 4(SP) // save original stack pointer 701 MOVL BX, 0(SP) // first argument in x86-32 ABI 702 CALL AX 703 704 MOVL 4(SP), CX // restore original stack pointer 705 MOVL CX, SP 706 MOVL AX, ret+8(FP) 707 RET 708 709 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 710 // See cgocall.go for more details. 711 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below 712 NO_LOCAL_POINTERS 713 714 // If g is nil, Go did not create the current thread. 715 // Call needm to obtain one for temporary use. 716 // In this case, we're running on the thread stack, so there's 717 // lots of space, but the linker doesn't know. Hide the call from 718 // the linker analysis by using an indirect call through AX. 719 get_tls(CX) 720 #ifdef GOOS_windows 721 MOVL $0, BP 722 CMPL CX, $0 723 JEQ 2(PC) // TODO 724 #endif 725 MOVL g(CX), BP 726 CMPL BP, $0 727 JEQ needm 728 MOVL g_m(BP), BP 729 MOVL BP, savedm-4(SP) // saved copy of oldm 730 JMP havem 731 needm: 732 MOVL $runtime·needm(SB), AX 733 CALL AX 734 MOVL $0, savedm-4(SP) // dropm on return 735 get_tls(CX) 736 MOVL g(CX), BP 737 MOVL g_m(BP), BP 738 739 // Set m->sched.sp = SP, so that if a panic happens 740 // during the function we are about to execute, it will 741 // have a valid SP to run on the g0 stack. 742 // The next few lines (after the havem label) 743 // will save this SP onto the stack and then write 744 // the same SP back to m->sched.sp. That seems redundant, 745 // but if an unrecovered panic happens, unwindm will 746 // restore the g->sched.sp from the stack location 747 // and then systemstack will try to use it. If we don't set it here, 748 // that restored SP will be uninitialized (typically 0) and 749 // will not be usable. 750 MOVL m_g0(BP), SI 751 MOVL SP, (g_sched+gobuf_sp)(SI) 752 753 havem: 754 // Now there's a valid m, and we're running on its m->g0. 755 // Save current m->g0->sched.sp on stack and then set it to SP. 756 // Save current sp in m->g0->sched.sp in preparation for 757 // switch back to m->curg stack. 758 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 759 MOVL m_g0(BP), SI 760 MOVL (g_sched+gobuf_sp)(SI), AX 761 MOVL AX, 0(SP) 762 MOVL SP, (g_sched+gobuf_sp)(SI) 763 764 // Switch to m->curg stack and call runtime.cgocallbackg. 765 // Because we are taking over the execution of m->curg 766 // but *not* resuming what had been running, we need to 767 // save that information (m->curg->sched) so we can restore it. 768 // We can restore m->curg->sched.sp easily, because calling 769 // runtime.cgocallbackg leaves SP unchanged upon return. 770 // To save m->curg->sched.pc, we push it onto the curg stack and 771 // open a frame the same size as cgocallback's g0 frame. 772 // Once we switch to the curg stack, the pushed PC will appear 773 // to be the return PC of cgocallback, so that the traceback 774 // will seamlessly trace back into the earlier calls. 775 MOVL m_curg(BP), SI 776 MOVL SI, g(CX) 777 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 778 MOVL (g_sched+gobuf_pc)(SI), BP 779 MOVL BP, -4(DI) // "push" return PC on the g stack 780 // Gather our arguments into registers. 781 MOVL fn+0(FP), AX 782 MOVL frame+4(FP), BX 783 MOVL ctxt+8(FP), CX 784 LEAL -(4+12)(DI), SP // Must match declared frame size 785 MOVL AX, 0(SP) 786 MOVL BX, 4(SP) 787 MOVL CX, 8(SP) 788 CALL runtime·cgocallbackg(SB) 789 790 // Restore g->sched (== m->curg->sched) from saved values. 791 get_tls(CX) 792 MOVL g(CX), SI 793 MOVL 12(SP), BP // Must match declared frame size 794 MOVL BP, (g_sched+gobuf_pc)(SI) 795 LEAL (12+4)(SP), DI // Must match declared frame size 796 MOVL DI, (g_sched+gobuf_sp)(SI) 797 798 // Switch back to m->g0's stack and restore m->g0->sched.sp. 799 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 800 // so we do not have to restore it.) 801 MOVL g(CX), BP 802 MOVL g_m(BP), BP 803 MOVL m_g0(BP), SI 804 MOVL SI, g(CX) 805 MOVL (g_sched+gobuf_sp)(SI), SP 806 MOVL 0(SP), AX 807 MOVL AX, (g_sched+gobuf_sp)(SI) 808 809 // If the m on entry was nil, we called needm above to borrow an m 810 // for the duration of the call. Since the call is over, return it with dropm. 811 MOVL savedm-4(SP), DX 812 CMPL DX, $0 813 JNE 3(PC) 814 MOVL $runtime·dropm(SB), AX 815 CALL AX 816 817 // Done! 818 RET 819 820 // void setg(G*); set g. for use by needm. 821 TEXT runtime·setg(SB), NOSPLIT, $0-4 822 MOVL gg+0(FP), BX 823 #ifdef GOOS_windows 824 CMPL BX, $0 825 JNE settls 826 MOVL $0, 0x14(FS) 827 RET 828 settls: 829 MOVL g_m(BX), AX 830 LEAL m_tls(AX), AX 831 MOVL AX, 0x14(FS) 832 #endif 833 get_tls(CX) 834 MOVL BX, g(CX) 835 RET 836 837 // void setg_gcc(G*); set g. for use by gcc 838 TEXT setg_gcc<>(SB), NOSPLIT, $0 839 get_tls(AX) 840 MOVL gg+0(FP), DX 841 MOVL DX, g(AX) 842 RET 843 844 TEXT runtime·abort(SB),NOSPLIT,$0-0 845 INT $3 846 loop: 847 JMP loop 848 849 // check that SP is in range [g->stack.lo, g->stack.hi) 850 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 851 get_tls(CX) 852 MOVL g(CX), AX 853 CMPL (g_stack+stack_hi)(AX), SP 854 JHI 2(PC) 855 CALL runtime·abort(SB) 856 CMPL SP, (g_stack+stack_lo)(AX) 857 JHI 2(PC) 858 CALL runtime·abort(SB) 859 RET 860 861 // func cputicks() int64 862 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 863 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 864 JNE done 865 CMPB runtime·lfenceBeforeRdtsc(SB), $1 866 JNE mfence 867 LFENCE 868 JMP done 869 mfence: 870 MFENCE 871 done: 872 RDTSC 873 MOVL AX, ret_lo+0(FP) 874 MOVL DX, ret_hi+4(FP) 875 RET 876 877 TEXT ldt0setup<>(SB),NOSPLIT,$16-0 878 // set up ldt 7 to point at m0.tls 879 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 880 // the entry number is just a hint. setldt will set up GS with what it used. 881 MOVL $7, 0(SP) 882 LEAL runtime·m0+m_tls(SB), AX 883 MOVL AX, 4(SP) 884 MOVL $32, 8(SP) // sizeof(tls array) 885 CALL runtime·setldt(SB) 886 RET 887 888 TEXT runtime·emptyfunc(SB),0,$0-0 889 RET 890 891 // hash function using AES hardware instructions 892 TEXT runtime·memhash(SB),NOSPLIT,$0-16 893 CMPB runtime·useAeshash(SB), $0 894 JEQ noaes 895 MOVL p+0(FP), AX // ptr to data 896 MOVL s+8(FP), BX // size 897 LEAL ret+12(FP), DX 898 JMP aeshashbody<>(SB) 899 noaes: 900 JMP runtime·memhashFallback(SB) 901 902 TEXT runtime·strhash(SB),NOSPLIT,$0-12 903 CMPB runtime·useAeshash(SB), $0 904 JEQ noaes 905 MOVL p+0(FP), AX // ptr to string object 906 MOVL 4(AX), BX // length of string 907 MOVL (AX), AX // string data 908 LEAL ret+8(FP), DX 909 JMP aeshashbody<>(SB) 910 noaes: 911 JMP runtime·strhashFallback(SB) 912 913 // AX: data 914 // BX: length 915 // DX: address to put return value 916 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 917 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 918 PINSRW $4, BX, X0 // 16 bits of length 919 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 920 MOVO X0, X1 // save unscrambled seed 921 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 922 AESENC X0, X0 // scramble seed 923 924 CMPL BX, $16 925 JB aes0to15 926 JE aes16 927 CMPL BX, $32 928 JBE aes17to32 929 CMPL BX, $64 930 JBE aes33to64 931 JMP aes65plus 932 933 aes0to15: 934 TESTL BX, BX 935 JE aes0 936 937 ADDL $16, AX 938 TESTW $0xff0, AX 939 JE endofpage 940 941 // 16 bytes loaded at this address won't cross 942 // a page boundary, so we can load it directly. 943 MOVOU -16(AX), X1 944 ADDL BX, BX 945 PAND masks<>(SB)(BX*8), X1 946 947 final1: 948 AESENC X0, X1 // scramble input, xor in seed 949 AESENC X1, X1 // scramble combo 2 times 950 AESENC X1, X1 951 MOVL X1, (DX) 952 RET 953 954 endofpage: 955 // address ends in 1111xxxx. Might be up against 956 // a page boundary, so load ending at last byte. 957 // Then shift bytes down using pshufb. 958 MOVOU -32(AX)(BX*1), X1 959 ADDL BX, BX 960 PSHUFB shifts<>(SB)(BX*8), X1 961 JMP final1 962 963 aes0: 964 // Return scrambled input seed 965 AESENC X0, X0 966 MOVL X0, (DX) 967 RET 968 969 aes16: 970 MOVOU (AX), X1 971 JMP final1 972 973 aes17to32: 974 // make second starting seed 975 PXOR runtime·aeskeysched+16(SB), X1 976 AESENC X1, X1 977 978 // load data to be hashed 979 MOVOU (AX), X2 980 MOVOU -16(AX)(BX*1), X3 981 982 // scramble 3 times 983 AESENC X0, X2 984 AESENC X1, X3 985 AESENC X2, X2 986 AESENC X3, X3 987 AESENC X2, X2 988 AESENC X3, X3 989 990 // combine results 991 PXOR X3, X2 992 MOVL X2, (DX) 993 RET 994 995 aes33to64: 996 // make 3 more starting seeds 997 MOVO X1, X2 998 MOVO X1, X3 999 PXOR runtime·aeskeysched+16(SB), X1 1000 PXOR runtime·aeskeysched+32(SB), X2 1001 PXOR runtime·aeskeysched+48(SB), X3 1002 AESENC X1, X1 1003 AESENC X2, X2 1004 AESENC X3, X3 1005 1006 MOVOU (AX), X4 1007 MOVOU 16(AX), X5 1008 MOVOU -32(AX)(BX*1), X6 1009 MOVOU -16(AX)(BX*1), X7 1010 1011 AESENC X0, X4 1012 AESENC X1, X5 1013 AESENC X2, X6 1014 AESENC X3, X7 1015 1016 AESENC X4, X4 1017 AESENC X5, X5 1018 AESENC X6, X6 1019 AESENC X7, X7 1020 1021 AESENC X4, X4 1022 AESENC X5, X5 1023 AESENC X6, X6 1024 AESENC X7, X7 1025 1026 PXOR X6, X4 1027 PXOR X7, X5 1028 PXOR X5, X4 1029 MOVL X4, (DX) 1030 RET 1031 1032 aes65plus: 1033 // make 3 more starting seeds 1034 MOVO X1, X2 1035 MOVO X1, X3 1036 PXOR runtime·aeskeysched+16(SB), X1 1037 PXOR runtime·aeskeysched+32(SB), X2 1038 PXOR runtime·aeskeysched+48(SB), X3 1039 AESENC X1, X1 1040 AESENC X2, X2 1041 AESENC X3, X3 1042 1043 // start with last (possibly overlapping) block 1044 MOVOU -64(AX)(BX*1), X4 1045 MOVOU -48(AX)(BX*1), X5 1046 MOVOU -32(AX)(BX*1), X6 1047 MOVOU -16(AX)(BX*1), X7 1048 1049 // scramble state once 1050 AESENC X0, X4 1051 AESENC X1, X5 1052 AESENC X2, X6 1053 AESENC X3, X7 1054 1055 // compute number of remaining 64-byte blocks 1056 DECL BX 1057 SHRL $6, BX 1058 1059 aesloop: 1060 // scramble state, xor in a block 1061 MOVOU (AX), X0 1062 MOVOU 16(AX), X1 1063 MOVOU 32(AX), X2 1064 MOVOU 48(AX), X3 1065 AESENC X0, X4 1066 AESENC X1, X5 1067 AESENC X2, X6 1068 AESENC X3, X7 1069 1070 // scramble state 1071 AESENC X4, X4 1072 AESENC X5, X5 1073 AESENC X6, X6 1074 AESENC X7, X7 1075 1076 ADDL $64, AX 1077 DECL BX 1078 JNE aesloop 1079 1080 // 2 more scrambles to finish 1081 AESENC X4, X4 1082 AESENC X5, X5 1083 AESENC X6, X6 1084 AESENC X7, X7 1085 1086 AESENC X4, X4 1087 AESENC X5, X5 1088 AESENC X6, X6 1089 AESENC X7, X7 1090 1091 PXOR X6, X4 1092 PXOR X7, X5 1093 PXOR X5, X4 1094 MOVL X4, (DX) 1095 RET 1096 1097 TEXT runtime·memhash32(SB),NOSPLIT,$0-12 1098 CMPB runtime·useAeshash(SB), $0 1099 JEQ noaes 1100 MOVL p+0(FP), AX // ptr to data 1101 MOVL h+4(FP), X0 // seed 1102 PINSRD $1, (AX), X0 // data 1103 AESENC runtime·aeskeysched+0(SB), X0 1104 AESENC runtime·aeskeysched+16(SB), X0 1105 AESENC runtime·aeskeysched+32(SB), X0 1106 MOVL X0, ret+8(FP) 1107 RET 1108 noaes: 1109 JMP runtime·memhash32Fallback(SB) 1110 1111 TEXT runtime·memhash64(SB),NOSPLIT,$0-12 1112 CMPB runtime·useAeshash(SB), $0 1113 JEQ noaes 1114 MOVL p+0(FP), AX // ptr to data 1115 MOVQ (AX), X0 // data 1116 PINSRD $2, h+4(FP), X0 // seed 1117 AESENC runtime·aeskeysched+0(SB), X0 1118 AESENC runtime·aeskeysched+16(SB), X0 1119 AESENC runtime·aeskeysched+32(SB), X0 1120 MOVL X0, ret+8(FP) 1121 RET 1122 noaes: 1123 JMP runtime·memhash64Fallback(SB) 1124 1125 // simple mask to get rid of data in the high part of the register. 1126 DATA masks<>+0x00(SB)/4, $0x00000000 1127 DATA masks<>+0x04(SB)/4, $0x00000000 1128 DATA masks<>+0x08(SB)/4, $0x00000000 1129 DATA masks<>+0x0c(SB)/4, $0x00000000 1130 1131 DATA masks<>+0x10(SB)/4, $0x000000ff 1132 DATA masks<>+0x14(SB)/4, $0x00000000 1133 DATA masks<>+0x18(SB)/4, $0x00000000 1134 DATA masks<>+0x1c(SB)/4, $0x00000000 1135 1136 DATA masks<>+0x20(SB)/4, $0x0000ffff 1137 DATA masks<>+0x24(SB)/4, $0x00000000 1138 DATA masks<>+0x28(SB)/4, $0x00000000 1139 DATA masks<>+0x2c(SB)/4, $0x00000000 1140 1141 DATA masks<>+0x30(SB)/4, $0x00ffffff 1142 DATA masks<>+0x34(SB)/4, $0x00000000 1143 DATA masks<>+0x38(SB)/4, $0x00000000 1144 DATA masks<>+0x3c(SB)/4, $0x00000000 1145 1146 DATA masks<>+0x40(SB)/4, $0xffffffff 1147 DATA masks<>+0x44(SB)/4, $0x00000000 1148 DATA masks<>+0x48(SB)/4, $0x00000000 1149 DATA masks<>+0x4c(SB)/4, $0x00000000 1150 1151 DATA masks<>+0x50(SB)/4, $0xffffffff 1152 DATA masks<>+0x54(SB)/4, $0x000000ff 1153 DATA masks<>+0x58(SB)/4, $0x00000000 1154 DATA masks<>+0x5c(SB)/4, $0x00000000 1155 1156 DATA masks<>+0x60(SB)/4, $0xffffffff 1157 DATA masks<>+0x64(SB)/4, $0x0000ffff 1158 DATA masks<>+0x68(SB)/4, $0x00000000 1159 DATA masks<>+0x6c(SB)/4, $0x00000000 1160 1161 DATA masks<>+0x70(SB)/4, $0xffffffff 1162 DATA masks<>+0x74(SB)/4, $0x00ffffff 1163 DATA masks<>+0x78(SB)/4, $0x00000000 1164 DATA masks<>+0x7c(SB)/4, $0x00000000 1165 1166 DATA masks<>+0x80(SB)/4, $0xffffffff 1167 DATA masks<>+0x84(SB)/4, $0xffffffff 1168 DATA masks<>+0x88(SB)/4, $0x00000000 1169 DATA masks<>+0x8c(SB)/4, $0x00000000 1170 1171 DATA masks<>+0x90(SB)/4, $0xffffffff 1172 DATA masks<>+0x94(SB)/4, $0xffffffff 1173 DATA masks<>+0x98(SB)/4, $0x000000ff 1174 DATA masks<>+0x9c(SB)/4, $0x00000000 1175 1176 DATA masks<>+0xa0(SB)/4, $0xffffffff 1177 DATA masks<>+0xa4(SB)/4, $0xffffffff 1178 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1179 DATA masks<>+0xac(SB)/4, $0x00000000 1180 1181 DATA masks<>+0xb0(SB)/4, $0xffffffff 1182 DATA masks<>+0xb4(SB)/4, $0xffffffff 1183 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1184 DATA masks<>+0xbc(SB)/4, $0x00000000 1185 1186 DATA masks<>+0xc0(SB)/4, $0xffffffff 1187 DATA masks<>+0xc4(SB)/4, $0xffffffff 1188 DATA masks<>+0xc8(SB)/4, $0xffffffff 1189 DATA masks<>+0xcc(SB)/4, $0x00000000 1190 1191 DATA masks<>+0xd0(SB)/4, $0xffffffff 1192 DATA masks<>+0xd4(SB)/4, $0xffffffff 1193 DATA masks<>+0xd8(SB)/4, $0xffffffff 1194 DATA masks<>+0xdc(SB)/4, $0x000000ff 1195 1196 DATA masks<>+0xe0(SB)/4, $0xffffffff 1197 DATA masks<>+0xe4(SB)/4, $0xffffffff 1198 DATA masks<>+0xe8(SB)/4, $0xffffffff 1199 DATA masks<>+0xec(SB)/4, $0x0000ffff 1200 1201 DATA masks<>+0xf0(SB)/4, $0xffffffff 1202 DATA masks<>+0xf4(SB)/4, $0xffffffff 1203 DATA masks<>+0xf8(SB)/4, $0xffffffff 1204 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1205 1206 GLOBL masks<>(SB),RODATA,$256 1207 1208 // these are arguments to pshufb. They move data down from 1209 // the high bytes of the register to the low bytes of the register. 1210 // index is how many bytes to move. 1211 DATA shifts<>+0x00(SB)/4, $0x00000000 1212 DATA shifts<>+0x04(SB)/4, $0x00000000 1213 DATA shifts<>+0x08(SB)/4, $0x00000000 1214 DATA shifts<>+0x0c(SB)/4, $0x00000000 1215 1216 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1217 DATA shifts<>+0x14(SB)/4, $0xffffffff 1218 DATA shifts<>+0x18(SB)/4, $0xffffffff 1219 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1220 1221 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1222 DATA shifts<>+0x24(SB)/4, $0xffffffff 1223 DATA shifts<>+0x28(SB)/4, $0xffffffff 1224 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1225 1226 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1227 DATA shifts<>+0x34(SB)/4, $0xffffffff 1228 DATA shifts<>+0x38(SB)/4, $0xffffffff 1229 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1230 1231 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1232 DATA shifts<>+0x44(SB)/4, $0xffffffff 1233 DATA shifts<>+0x48(SB)/4, $0xffffffff 1234 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1235 1236 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1237 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1238 DATA shifts<>+0x58(SB)/4, $0xffffffff 1239 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1240 1241 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1242 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1243 DATA shifts<>+0x68(SB)/4, $0xffffffff 1244 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1245 1246 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1247 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1248 DATA shifts<>+0x78(SB)/4, $0xffffffff 1249 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1250 1251 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1252 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1253 DATA shifts<>+0x88(SB)/4, $0xffffffff 1254 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1255 1256 DATA shifts<>+0x90(SB)/4, $0x0a090807 1257 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1258 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1259 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1260 1261 DATA shifts<>+0xa0(SB)/4, $0x09080706 1262 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1263 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1264 DATA shifts<>+0xac(SB)/4, $0xffffffff 1265 1266 DATA shifts<>+0xb0(SB)/4, $0x08070605 1267 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1268 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1269 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1270 1271 DATA shifts<>+0xc0(SB)/4, $0x07060504 1272 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1273 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1274 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1275 1276 DATA shifts<>+0xd0(SB)/4, $0x06050403 1277 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1278 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1279 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1280 1281 DATA shifts<>+0xe0(SB)/4, $0x05040302 1282 DATA shifts<>+0xe4(SB)/4, $0x09080706 1283 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1284 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1285 1286 DATA shifts<>+0xf0(SB)/4, $0x04030201 1287 DATA shifts<>+0xf4(SB)/4, $0x08070605 1288 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1289 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1290 1291 GLOBL shifts<>(SB),RODATA,$256 1292 1293 TEXT ·checkASM(SB),NOSPLIT,$0-1 1294 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1295 MOVL $masks<>(SB), AX 1296 MOVL $shifts<>(SB), BX 1297 ORL BX, AX 1298 TESTL $15, AX 1299 SETEQ ret+0(FP) 1300 RET 1301 1302 TEXT runtime·return0(SB), NOSPLIT, $0 1303 MOVL $0, AX 1304 RET 1305 1306 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1307 // Must obey the gcc calling convention. 1308 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1309 get_tls(CX) 1310 MOVL g(CX), AX 1311 MOVL g_m(AX), AX 1312 MOVL m_curg(AX), AX 1313 MOVL (g_stack+stack_hi)(AX), AX 1314 RET 1315 1316 // The top-most function running on a goroutine 1317 // returns to goexit+PCQuantum. 1318 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0 1319 BYTE $0x90 // NOP 1320 CALL runtime·goexit1(SB) // does not return 1321 // traceback from goexit1 must hit code range of goexit 1322 BYTE $0x90 // NOP 1323 1324 // Add a module's moduledata to the linked list of moduledata objects. This 1325 // is called from .init_array by a function generated in the linker and so 1326 // follows the platform ABI wrt register preservation -- it only touches AX, 1327 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1328 // instead the pointer to the moduledata is passed in AX. 1329 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1330 MOVL runtime·lastmoduledatap(SB), DX 1331 MOVL AX, moduledata_next(DX) 1332 MOVL AX, runtime·lastmoduledatap(SB) 1333 RET 1334 1335 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1336 MOVL a+0(FP), AX 1337 MOVL AX, 0(SP) 1338 MOVL $0, 4(SP) 1339 FMOVV 0(SP), F0 1340 FMOVDP F0, ret+4(FP) 1341 RET 1342 1343 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1344 FMOVD a+0(FP), F0 1345 FSTCW 0(SP) 1346 FLDCW runtime·controlWord64trunc(SB) 1347 FMOVVP F0, 4(SP) 1348 FLDCW 0(SP) 1349 MOVL 4(SP), AX 1350 MOVL AX, ret+8(FP) 1351 RET 1352 1353 // gcWriteBarrier performs a heap pointer write and informs the GC. 1354 // 1355 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1356 // - DI is the destination of the write 1357 // - AX is the value being written at DI 1358 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1359 // but may clobber others (e.g., SSE registers). 1360 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28 1361 // Save the registers clobbered by the fast path. This is slightly 1362 // faster than having the caller spill these. 1363 MOVL CX, 20(SP) 1364 MOVL BX, 24(SP) 1365 // TODO: Consider passing g.m.p in as an argument so they can be shared 1366 // across a sequence of write barriers. 1367 get_tls(BX) 1368 MOVL g(BX), BX 1369 MOVL g_m(BX), BX 1370 MOVL m_p(BX), BX 1371 MOVL (p_wbBuf+wbBuf_next)(BX), CX 1372 // Increment wbBuf.next position. 1373 LEAL 8(CX), CX 1374 MOVL CX, (p_wbBuf+wbBuf_next)(BX) 1375 CMPL CX, (p_wbBuf+wbBuf_end)(BX) 1376 // Record the write. 1377 MOVL AX, -8(CX) // Record value 1378 MOVL (DI), BX // TODO: This turns bad writes into bad reads. 1379 MOVL BX, -4(CX) // Record *slot 1380 // Is the buffer full? (flags set in CMPL above) 1381 JEQ flush 1382 ret: 1383 MOVL 20(SP), CX 1384 MOVL 24(SP), BX 1385 // Do the write. 1386 MOVL AX, (DI) 1387 RET 1388 1389 flush: 1390 // Save all general purpose registers since these could be 1391 // clobbered by wbBufFlush and were not saved by the caller. 1392 MOVL DI, 0(SP) // Also first argument to wbBufFlush 1393 MOVL AX, 4(SP) // Also second argument to wbBufFlush 1394 // BX already saved 1395 // CX already saved 1396 MOVL DX, 8(SP) 1397 MOVL BP, 12(SP) 1398 MOVL SI, 16(SP) 1399 // DI already saved 1400 1401 // This takes arguments DI and AX 1402 CALL runtime·wbBufFlush(SB) 1403 1404 MOVL 0(SP), DI 1405 MOVL 4(SP), AX 1406 MOVL 8(SP), DX 1407 MOVL 12(SP), BP 1408 MOVL 16(SP), SI 1409 JMP ret 1410 1411 // Note: these functions use a special calling convention to save generated code space. 1412 // Arguments are passed in registers, but the space for those arguments are allocated 1413 // in the caller's stack frame. These stubs write the args into that stack space and 1414 // then tail call to the corresponding runtime handler. 1415 // The tail call makes these stubs disappear in backtraces. 1416 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8 1417 MOVL AX, x+0(FP) 1418 MOVL CX, y+4(FP) 1419 JMP runtime·goPanicIndex(SB) 1420 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8 1421 MOVL AX, x+0(FP) 1422 MOVL CX, y+4(FP) 1423 JMP runtime·goPanicIndexU(SB) 1424 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8 1425 MOVL CX, x+0(FP) 1426 MOVL DX, y+4(FP) 1427 JMP runtime·goPanicSliceAlen(SB) 1428 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8 1429 MOVL CX, x+0(FP) 1430 MOVL DX, y+4(FP) 1431 JMP runtime·goPanicSliceAlenU(SB) 1432 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8 1433 MOVL CX, x+0(FP) 1434 MOVL DX, y+4(FP) 1435 JMP runtime·goPanicSliceAcap(SB) 1436 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8 1437 MOVL CX, x+0(FP) 1438 MOVL DX, y+4(FP) 1439 JMP runtime·goPanicSliceAcapU(SB) 1440 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8 1441 MOVL AX, x+0(FP) 1442 MOVL CX, y+4(FP) 1443 JMP runtime·goPanicSliceB(SB) 1444 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8 1445 MOVL AX, x+0(FP) 1446 MOVL CX, y+4(FP) 1447 JMP runtime·goPanicSliceBU(SB) 1448 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8 1449 MOVL DX, x+0(FP) 1450 MOVL BX, y+4(FP) 1451 JMP runtime·goPanicSlice3Alen(SB) 1452 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8 1453 MOVL DX, x+0(FP) 1454 MOVL BX, y+4(FP) 1455 JMP runtime·goPanicSlice3AlenU(SB) 1456 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8 1457 MOVL DX, x+0(FP) 1458 MOVL BX, y+4(FP) 1459 JMP runtime·goPanicSlice3Acap(SB) 1460 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8 1461 MOVL DX, x+0(FP) 1462 MOVL BX, y+4(FP) 1463 JMP runtime·goPanicSlice3AcapU(SB) 1464 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8 1465 MOVL CX, x+0(FP) 1466 MOVL DX, y+4(FP) 1467 JMP runtime·goPanicSlice3B(SB) 1468 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8 1469 MOVL CX, x+0(FP) 1470 MOVL DX, y+4(FP) 1471 JMP runtime·goPanicSlice3BU(SB) 1472 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8 1473 MOVL AX, x+0(FP) 1474 MOVL CX, y+4(FP) 1475 JMP runtime·goPanicSlice3C(SB) 1476 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8 1477 MOVL AX, x+0(FP) 1478 MOVL CX, y+4(FP) 1479 JMP runtime·goPanicSlice3CU(SB) 1480 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8 1481 MOVL DX, x+0(FP) 1482 MOVL BX, y+4(FP) 1483 JMP runtime·goPanicSliceConvert(SB) 1484 1485 // Extended versions for 64-bit indexes. 1486 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12 1487 MOVL SI, hi+0(FP) 1488 MOVL AX, lo+4(FP) 1489 MOVL CX, y+8(FP) 1490 JMP runtime·goPanicExtendIndex(SB) 1491 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12 1492 MOVL SI, hi+0(FP) 1493 MOVL AX, lo+4(FP) 1494 MOVL CX, y+8(FP) 1495 JMP runtime·goPanicExtendIndexU(SB) 1496 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12 1497 MOVL SI, hi+0(FP) 1498 MOVL CX, lo+4(FP) 1499 MOVL DX, y+8(FP) 1500 JMP runtime·goPanicExtendSliceAlen(SB) 1501 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12 1502 MOVL SI, hi+0(FP) 1503 MOVL CX, lo+4(FP) 1504 MOVL DX, y+8(FP) 1505 JMP runtime·goPanicExtendSliceAlenU(SB) 1506 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12 1507 MOVL SI, hi+0(FP) 1508 MOVL CX, lo+4(FP) 1509 MOVL DX, y+8(FP) 1510 JMP runtime·goPanicExtendSliceAcap(SB) 1511 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12 1512 MOVL SI, hi+0(FP) 1513 MOVL CX, lo+4(FP) 1514 MOVL DX, y+8(FP) 1515 JMP runtime·goPanicExtendSliceAcapU(SB) 1516 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12 1517 MOVL SI, hi+0(FP) 1518 MOVL AX, lo+4(FP) 1519 MOVL CX, y+8(FP) 1520 JMP runtime·goPanicExtendSliceB(SB) 1521 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12 1522 MOVL SI, hi+0(FP) 1523 MOVL AX, lo+4(FP) 1524 MOVL CX, y+8(FP) 1525 JMP runtime·goPanicExtendSliceBU(SB) 1526 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12 1527 MOVL SI, hi+0(FP) 1528 MOVL DX, lo+4(FP) 1529 MOVL BX, y+8(FP) 1530 JMP runtime·goPanicExtendSlice3Alen(SB) 1531 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12 1532 MOVL SI, hi+0(FP) 1533 MOVL DX, lo+4(FP) 1534 MOVL BX, y+8(FP) 1535 JMP runtime·goPanicExtendSlice3AlenU(SB) 1536 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12 1537 MOVL SI, hi+0(FP) 1538 MOVL DX, lo+4(FP) 1539 MOVL BX, y+8(FP) 1540 JMP runtime·goPanicExtendSlice3Acap(SB) 1541 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12 1542 MOVL SI, hi+0(FP) 1543 MOVL DX, lo+4(FP) 1544 MOVL BX, y+8(FP) 1545 JMP runtime·goPanicExtendSlice3AcapU(SB) 1546 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12 1547 MOVL SI, hi+0(FP) 1548 MOVL CX, lo+4(FP) 1549 MOVL DX, y+8(FP) 1550 JMP runtime·goPanicExtendSlice3B(SB) 1551 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12 1552 MOVL SI, hi+0(FP) 1553 MOVL CX, lo+4(FP) 1554 MOVL DX, y+8(FP) 1555 JMP runtime·goPanicExtendSlice3BU(SB) 1556 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12 1557 MOVL SI, hi+0(FP) 1558 MOVL AX, lo+4(FP) 1559 MOVL CX, y+8(FP) 1560 JMP runtime·goPanicExtendSlice3C(SB) 1561 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 1562 MOVL SI, hi+0(FP) 1563 MOVL AX, lo+4(FP) 1564 MOVL CX, y+8(FP) 1565 JMP runtime·goPanicExtendSlice3CU(SB) 1566 1567 #ifdef GOOS_android 1568 // Use the free TLS_SLOT_APP slot #2 on Android Q. 1569 // Earlier androids are set up in gcc_android.c. 1570 DATA runtime·tls_g+0(SB)/4, $8 1571 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1572 #endif