github.com/AESNooper/go/src@v0.0.0-20220218095104-b56a4ab1bbbb/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtime·rt0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtime·newosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtime·rt0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtime·g0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 // first see if CPUID instruction is supported. 113 PUSHFL 114 PUSHFL 115 XORL $(1<<21), 0(SP) // flip ID bit 116 POPFL 117 PUSHFL 118 POPL AX 119 XORL 0(SP), AX 120 POPFL // restore EFLAGS 121 TESTL $(1<<21), AX 122 JNE has_cpuid 123 124 bad_proc: // show that the program requires MMX. 125 MOVL $2, 0(SP) 126 MOVL $bad_proc_msg<>(SB), 4(SP) 127 MOVL $0x3d, 8(SP) 128 CALL runtime·write(SB) 129 MOVL $1, 0(SP) 130 CALL runtime·exit(SB) 131 CALL runtime·abort(SB) 132 133 has_cpuid: 134 MOVL $0, AX 135 CPUID 136 MOVL AX, SI 137 CMPL AX, $0 138 JE nocpuinfo 139 140 CMPL BX, $0x756E6547 // "Genu" 141 JNE notintel 142 CMPL DX, $0x49656E69 // "ineI" 143 JNE notintel 144 CMPL CX, $0x6C65746E // "ntel" 145 JNE notintel 146 MOVB $1, runtime·isIntel(SB) 147 notintel: 148 149 // Load EAX=1 cpuid flags 150 MOVL $1, AX 151 CPUID 152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 153 MOVL AX, runtime·processorVersionInfo(SB) 154 155 // Check for MMX support 156 TESTL $(1<<23), DX // MMX 157 JZ bad_proc 158 159 nocpuinfo: 160 // if there is an _cgo_init, call it to let it 161 // initialize and to set up GS. if not, 162 // we set up GS ourselves. 163 MOVL _cgo_init(SB), AX 164 TESTL AX, AX 165 JZ needtls 166 #ifdef GOOS_android 167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 168 // Compensate for tls_g (+8). 169 MOVL -8(TLS), BX 170 MOVL BX, 12(SP) 171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 172 #else 173 MOVL $0, BX 174 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS 175 MOVL BX, 8(SP) 176 #endif 177 MOVL $setg_gcc<>(SB), BX 178 MOVL BX, 4(SP) // arg 2: setg_gcc 179 MOVL BP, 0(SP) // arg 1: g0 180 CALL AX 181 182 // update stackguard after _cgo_init 183 MOVL $runtime·g0(SB), CX 184 MOVL (g_stack+stack_lo)(CX), AX 185 ADDL $const__StackGuard, AX 186 MOVL AX, g_stackguard0(CX) 187 MOVL AX, g_stackguard1(CX) 188 189 #ifndef GOOS_windows 190 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 191 JMP ok 192 #endif 193 needtls: 194 #ifdef GOOS_openbsd 195 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases 196 JMP ok 197 #endif 198 #ifdef GOOS_plan9 199 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 200 JMP ok 201 #endif 202 203 // set up %gs 204 CALL ldt0setup<>(SB) 205 206 // store through it, to make sure it works 207 get_tls(BX) 208 MOVL $0x123, g(BX) 209 MOVL runtime·m0+m_tls(SB), AX 210 CMPL AX, $0x123 211 JEQ ok 212 MOVL AX, 0 // abort 213 ok: 214 // set up m and g "registers" 215 get_tls(BX) 216 LEAL runtime·g0(SB), DX 217 MOVL DX, g(BX) 218 LEAL runtime·m0(SB), AX 219 220 // save m->g0 = g0 221 MOVL DX, m_g0(AX) 222 // save g0->m = m0 223 MOVL AX, g_m(DX) 224 225 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 226 227 // convention is D is always cleared 228 CLD 229 230 CALL runtime·check(SB) 231 232 // saved argc, argv 233 MOVL 120(SP), AX 234 MOVL AX, 0(SP) 235 MOVL 124(SP), AX 236 MOVL AX, 4(SP) 237 CALL runtime·args(SB) 238 CALL runtime·osinit(SB) 239 CALL runtime·schedinit(SB) 240 241 // create a new goroutine to start program 242 PUSHL $runtime·mainPC(SB) // entry 243 CALL runtime·newproc(SB) 244 POPL AX 245 246 // start this M 247 CALL runtime·mstart(SB) 248 249 CALL runtime·abort(SB) 250 RET 251 252 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n" 253 GLOBL bad_proc_msg<>(SB), RODATA, $61 254 255 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 256 GLOBL runtime·mainPC(SB),RODATA,$4 257 258 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 259 INT $3 260 RET 261 262 TEXT runtime·asminit(SB),NOSPLIT,$0-0 263 // Linux and MinGW start the FPU in extended double precision. 264 // Other operating systems use double precision. 265 // Change to double precision to match them, 266 // and to match other hardware that only has double. 267 FLDCW runtime·controlWord64(SB) 268 RET 269 270 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 271 CALL runtime·mstart0(SB) 272 RET // not reached 273 274 /* 275 * go-routine 276 */ 277 278 // void gogo(Gobuf*) 279 // restore state from Gobuf; longjmp 280 TEXT runtime·gogo(SB), NOSPLIT, $0-4 281 MOVL buf+0(FP), BX // gobuf 282 MOVL gobuf_g(BX), DX 283 MOVL 0(DX), CX // make sure g != nil 284 JMP gogo<>(SB) 285 286 TEXT gogo<>(SB), NOSPLIT, $0 287 get_tls(CX) 288 MOVL DX, g(CX) 289 MOVL gobuf_sp(BX), SP // restore SP 290 MOVL gobuf_ret(BX), AX 291 MOVL gobuf_ctxt(BX), DX 292 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 293 MOVL $0, gobuf_ret(BX) 294 MOVL $0, gobuf_ctxt(BX) 295 MOVL gobuf_pc(BX), BX 296 JMP BX 297 298 // func mcall(fn func(*g)) 299 // Switch to m->g0's stack, call fn(g). 300 // Fn must never return. It should gogo(&g->sched) 301 // to keep running g. 302 TEXT runtime·mcall(SB), NOSPLIT, $0-4 303 MOVL fn+0(FP), DI 304 305 get_tls(DX) 306 MOVL g(DX), AX // save state in g->sched 307 MOVL 0(SP), BX // caller's PC 308 MOVL BX, (g_sched+gobuf_pc)(AX) 309 LEAL fn+0(FP), BX // caller's SP 310 MOVL BX, (g_sched+gobuf_sp)(AX) 311 312 // switch to m->g0 & its stack, call fn 313 MOVL g(DX), BX 314 MOVL g_m(BX), BX 315 MOVL m_g0(BX), SI 316 CMPL SI, AX // if g == m->g0 call badmcall 317 JNE 3(PC) 318 MOVL $runtime·badmcall(SB), AX 319 JMP AX 320 MOVL SI, g(DX) // g = m->g0 321 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 322 PUSHL AX 323 MOVL DI, DX 324 MOVL 0(DI), DI 325 CALL DI 326 POPL AX 327 MOVL $runtime·badmcall2(SB), AX 328 JMP AX 329 RET 330 331 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 332 // of the G stack. We need to distinguish the routine that 333 // lives at the bottom of the G stack from the one that lives 334 // at the top of the system stack because the one at the top of 335 // the system stack terminates the stack walk (see topofstack()). 336 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 337 RET 338 339 // func systemstack(fn func()) 340 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 341 MOVL fn+0(FP), DI // DI = fn 342 get_tls(CX) 343 MOVL g(CX), AX // AX = g 344 MOVL g_m(AX), BX // BX = m 345 346 CMPL AX, m_gsignal(BX) 347 JEQ noswitch 348 349 MOVL m_g0(BX), DX // DX = g0 350 CMPL AX, DX 351 JEQ noswitch 352 353 CMPL AX, m_curg(BX) 354 JNE bad 355 356 // switch stacks 357 // save our state in g->sched. Pretend to 358 // be systemstack_switch if the G stack is scanned. 359 CALL gosave_systemstack_switch<>(SB) 360 361 // switch to g0 362 get_tls(CX) 363 MOVL DX, g(CX) 364 MOVL (g_sched+gobuf_sp)(DX), BX 365 MOVL BX, SP 366 367 // call target function 368 MOVL DI, DX 369 MOVL 0(DI), DI 370 CALL DI 371 372 // switch back to g 373 get_tls(CX) 374 MOVL g(CX), AX 375 MOVL g_m(AX), BX 376 MOVL m_curg(BX), AX 377 MOVL AX, g(CX) 378 MOVL (g_sched+gobuf_sp)(AX), SP 379 MOVL $0, (g_sched+gobuf_sp)(AX) 380 RET 381 382 noswitch: 383 // already on system stack; tail call the function 384 // Using a tail call here cleans up tracebacks since we won't stop 385 // at an intermediate systemstack. 386 MOVL DI, DX 387 MOVL 0(DI), DI 388 JMP DI 389 390 bad: 391 // Bad: g is not gsignal, not g0, not curg. What is it? 392 // Hide call from linker nosplit analysis. 393 MOVL $runtime·badsystemstack(SB), AX 394 CALL AX 395 INT $3 396 397 /* 398 * support for morestack 399 */ 400 401 // Called during function prolog when more stack is needed. 402 // 403 // The traceback routines see morestack on a g0 as being 404 // the top of a stack (for example, morestack calling newstack 405 // calling the scheduler calling newm calling gc), so we must 406 // record an argument size. For that purpose, it has no arguments. 407 TEXT runtime·morestack(SB),NOSPLIT,$0-0 408 // Cannot grow scheduler stack (m->g0). 409 get_tls(CX) 410 MOVL g(CX), BX 411 MOVL g_m(BX), BX 412 MOVL m_g0(BX), SI 413 CMPL g(CX), SI 414 JNE 3(PC) 415 CALL runtime·badmorestackg0(SB) 416 CALL runtime·abort(SB) 417 418 // Cannot grow signal stack. 419 MOVL m_gsignal(BX), SI 420 CMPL g(CX), SI 421 JNE 3(PC) 422 CALL runtime·badmorestackgsignal(SB) 423 CALL runtime·abort(SB) 424 425 // Called from f. 426 // Set m->morebuf to f's caller. 427 NOP SP // tell vet SP changed - stop checking offsets 428 MOVL 4(SP), DI // f's caller's PC 429 MOVL DI, (m_morebuf+gobuf_pc)(BX) 430 LEAL 8(SP), CX // f's caller's SP 431 MOVL CX, (m_morebuf+gobuf_sp)(BX) 432 get_tls(CX) 433 MOVL g(CX), SI 434 MOVL SI, (m_morebuf+gobuf_g)(BX) 435 436 // Set g->sched to context in f. 437 MOVL 0(SP), AX // f's PC 438 MOVL AX, (g_sched+gobuf_pc)(SI) 439 LEAL 4(SP), AX // f's SP 440 MOVL AX, (g_sched+gobuf_sp)(SI) 441 MOVL DX, (g_sched+gobuf_ctxt)(SI) 442 443 // Call newstack on m->g0's stack. 444 MOVL m_g0(BX), BP 445 MOVL BP, g(CX) 446 MOVL (g_sched+gobuf_sp)(BP), AX 447 MOVL -4(AX), BX // fault if CALL would, before smashing SP 448 MOVL AX, SP 449 CALL runtime·newstack(SB) 450 CALL runtime·abort(SB) // crash if newstack returns 451 RET 452 453 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 454 MOVL $0, DX 455 JMP runtime·morestack(SB) 456 457 // reflectcall: call a function with the given argument list 458 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 459 // we don't have variable-sized frames, so we use a small number 460 // of constant-sized-frame functions to encode a few bits of size in the pc. 461 // Caution: ugly multiline assembly macros in your future! 462 463 #define DISPATCH(NAME,MAXSIZE) \ 464 CMPL CX, $MAXSIZE; \ 465 JA 3(PC); \ 466 MOVL $NAME(SB), AX; \ 467 JMP AX 468 // Note: can't just "JMP NAME(SB)" - bad inlining results. 469 470 TEXT ·reflectcall(SB), NOSPLIT, $0-28 471 MOVL frameSize+20(FP), CX 472 DISPATCH(runtime·call16, 16) 473 DISPATCH(runtime·call32, 32) 474 DISPATCH(runtime·call64, 64) 475 DISPATCH(runtime·call128, 128) 476 DISPATCH(runtime·call256, 256) 477 DISPATCH(runtime·call512, 512) 478 DISPATCH(runtime·call1024, 1024) 479 DISPATCH(runtime·call2048, 2048) 480 DISPATCH(runtime·call4096, 4096) 481 DISPATCH(runtime·call8192, 8192) 482 DISPATCH(runtime·call16384, 16384) 483 DISPATCH(runtime·call32768, 32768) 484 DISPATCH(runtime·call65536, 65536) 485 DISPATCH(runtime·call131072, 131072) 486 DISPATCH(runtime·call262144, 262144) 487 DISPATCH(runtime·call524288, 524288) 488 DISPATCH(runtime·call1048576, 1048576) 489 DISPATCH(runtime·call2097152, 2097152) 490 DISPATCH(runtime·call4194304, 4194304) 491 DISPATCH(runtime·call8388608, 8388608) 492 DISPATCH(runtime·call16777216, 16777216) 493 DISPATCH(runtime·call33554432, 33554432) 494 DISPATCH(runtime·call67108864, 67108864) 495 DISPATCH(runtime·call134217728, 134217728) 496 DISPATCH(runtime·call268435456, 268435456) 497 DISPATCH(runtime·call536870912, 536870912) 498 DISPATCH(runtime·call1073741824, 1073741824) 499 MOVL $runtime·badreflectcall(SB), AX 500 JMP AX 501 502 #define CALLFN(NAME,MAXSIZE) \ 503 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \ 504 NO_LOCAL_POINTERS; \ 505 /* copy arguments to stack */ \ 506 MOVL stackArgs+8(FP), SI; \ 507 MOVL stackArgsSize+12(FP), CX; \ 508 MOVL SP, DI; \ 509 REP;MOVSB; \ 510 /* call function */ \ 511 MOVL f+4(FP), DX; \ 512 MOVL (DX), AX; \ 513 PCDATA $PCDATA_StackMapIndex, $0; \ 514 CALL AX; \ 515 /* copy return values back */ \ 516 MOVL stackArgsType+0(FP), DX; \ 517 MOVL stackArgs+8(FP), DI; \ 518 MOVL stackArgsSize+12(FP), CX; \ 519 MOVL stackRetOffset+16(FP), BX; \ 520 MOVL SP, SI; \ 521 ADDL BX, DI; \ 522 ADDL BX, SI; \ 523 SUBL BX, CX; \ 524 CALL callRet<>(SB); \ 525 RET 526 527 // callRet copies return values back at the end of call*. This is a 528 // separate function so it can allocate stack space for the arguments 529 // to reflectcallmove. It does not follow the Go ABI; it expects its 530 // arguments in registers. 531 TEXT callRet<>(SB), NOSPLIT, $20-0 532 MOVL DX, 0(SP) 533 MOVL DI, 4(SP) 534 MOVL SI, 8(SP) 535 MOVL CX, 12(SP) 536 MOVL $0, 16(SP) 537 CALL runtime·reflectcallmove(SB) 538 RET 539 540 CALLFN(·call16, 16) 541 CALLFN(·call32, 32) 542 CALLFN(·call64, 64) 543 CALLFN(·call128, 128) 544 CALLFN(·call256, 256) 545 CALLFN(·call512, 512) 546 CALLFN(·call1024, 1024) 547 CALLFN(·call2048, 2048) 548 CALLFN(·call4096, 4096) 549 CALLFN(·call8192, 8192) 550 CALLFN(·call16384, 16384) 551 CALLFN(·call32768, 32768) 552 CALLFN(·call65536, 65536) 553 CALLFN(·call131072, 131072) 554 CALLFN(·call262144, 262144) 555 CALLFN(·call524288, 524288) 556 CALLFN(·call1048576, 1048576) 557 CALLFN(·call2097152, 2097152) 558 CALLFN(·call4194304, 4194304) 559 CALLFN(·call8388608, 8388608) 560 CALLFN(·call16777216, 16777216) 561 CALLFN(·call33554432, 33554432) 562 CALLFN(·call67108864, 67108864) 563 CALLFN(·call134217728, 134217728) 564 CALLFN(·call268435456, 268435456) 565 CALLFN(·call536870912, 536870912) 566 CALLFN(·call1073741824, 1073741824) 567 568 TEXT runtime·procyield(SB),NOSPLIT,$0-0 569 MOVL cycles+0(FP), AX 570 again: 571 PAUSE 572 SUBL $1, AX 573 JNZ again 574 RET 575 576 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 577 // Stores are already ordered on x86, so this is just a 578 // compile barrier. 579 RET 580 581 // Save state of caller into g->sched, 582 // but using fake PC from systemstack_switch. 583 // Must only be called from functions with no locals ($0) 584 // or else unwinding from systemstack_switch is incorrect. 585 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0 586 PUSHL AX 587 PUSHL BX 588 get_tls(BX) 589 MOVL g(BX), BX 590 LEAL arg+0(FP), AX 591 MOVL AX, (g_sched+gobuf_sp)(BX) 592 MOVL $runtime·systemstack_switch(SB), AX 593 MOVL AX, (g_sched+gobuf_pc)(BX) 594 MOVL $0, (g_sched+gobuf_ret)(BX) 595 // Assert ctxt is zero. See func save. 596 MOVL (g_sched+gobuf_ctxt)(BX), AX 597 TESTL AX, AX 598 JZ 2(PC) 599 CALL runtime·abort(SB) 600 POPL BX 601 POPL AX 602 RET 603 604 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 605 // Call fn(arg) aligned appropriately for the gcc ABI. 606 // Called on a system stack, and there may be no g yet (during needm). 607 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8 608 MOVL fn+0(FP), AX 609 MOVL arg+4(FP), BX 610 MOVL SP, DX 611 SUBL $32, SP 612 ANDL $~15, SP // alignment, perhaps unnecessary 613 MOVL DX, 8(SP) // save old SP 614 MOVL BX, 0(SP) // first argument in x86-32 ABI 615 CALL AX 616 MOVL 8(SP), DX 617 MOVL DX, SP 618 RET 619 620 // func asmcgocall(fn, arg unsafe.Pointer) int32 621 // Call fn(arg) on the scheduler stack, 622 // aligned appropriately for the gcc ABI. 623 // See cgocall.go for more details. 624 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 625 MOVL fn+0(FP), AX 626 MOVL arg+4(FP), BX 627 628 MOVL SP, DX 629 630 // Figure out if we need to switch to m->g0 stack. 631 // We get called to create new OS threads too, and those 632 // come in on the m->g0 stack already. Or we might already 633 // be on the m->gsignal stack. 634 get_tls(CX) 635 MOVL g(CX), DI 636 CMPL DI, $0 637 JEQ nosave // Don't even have a G yet. 638 MOVL g_m(DI), BP 639 CMPL DI, m_gsignal(BP) 640 JEQ noswitch 641 MOVL m_g0(BP), SI 642 CMPL DI, SI 643 JEQ noswitch 644 CALL gosave_systemstack_switch<>(SB) 645 get_tls(CX) 646 MOVL SI, g(CX) 647 MOVL (g_sched+gobuf_sp)(SI), SP 648 649 noswitch: 650 // Now on a scheduling stack (a pthread-created stack). 651 SUBL $32, SP 652 ANDL $~15, SP // alignment, perhaps unnecessary 653 MOVL DI, 8(SP) // save g 654 MOVL (g_stack+stack_hi)(DI), DI 655 SUBL DX, DI 656 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 657 MOVL BX, 0(SP) // first argument in x86-32 ABI 658 CALL AX 659 660 // Restore registers, g, stack pointer. 661 get_tls(CX) 662 MOVL 8(SP), DI 663 MOVL (g_stack+stack_hi)(DI), SI 664 SUBL 4(SP), SI 665 MOVL DI, g(CX) 666 MOVL SI, SP 667 668 MOVL AX, ret+8(FP) 669 RET 670 nosave: 671 // Now on a scheduling stack (a pthread-created stack). 672 SUBL $32, SP 673 ANDL $~15, SP // alignment, perhaps unnecessary 674 MOVL DX, 4(SP) // save original stack pointer 675 MOVL BX, 0(SP) // first argument in x86-32 ABI 676 CALL AX 677 678 MOVL 4(SP), CX // restore original stack pointer 679 MOVL CX, SP 680 MOVL AX, ret+8(FP) 681 RET 682 683 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 684 // See cgocall.go for more details. 685 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below 686 NO_LOCAL_POINTERS 687 688 // If g is nil, Go did not create the current thread. 689 // Call needm to obtain one for temporary use. 690 // In this case, we're running on the thread stack, so there's 691 // lots of space, but the linker doesn't know. Hide the call from 692 // the linker analysis by using an indirect call through AX. 693 get_tls(CX) 694 #ifdef GOOS_windows 695 MOVL $0, BP 696 CMPL CX, $0 697 JEQ 2(PC) // TODO 698 #endif 699 MOVL g(CX), BP 700 CMPL BP, $0 701 JEQ needm 702 MOVL g_m(BP), BP 703 MOVL BP, savedm-4(SP) // saved copy of oldm 704 JMP havem 705 needm: 706 MOVL $runtime·needm(SB), AX 707 CALL AX 708 MOVL $0, savedm-4(SP) // dropm on return 709 get_tls(CX) 710 MOVL g(CX), BP 711 MOVL g_m(BP), BP 712 713 // Set m->sched.sp = SP, so that if a panic happens 714 // during the function we are about to execute, it will 715 // have a valid SP to run on the g0 stack. 716 // The next few lines (after the havem label) 717 // will save this SP onto the stack and then write 718 // the same SP back to m->sched.sp. That seems redundant, 719 // but if an unrecovered panic happens, unwindm will 720 // restore the g->sched.sp from the stack location 721 // and then systemstack will try to use it. If we don't set it here, 722 // that restored SP will be uninitialized (typically 0) and 723 // will not be usable. 724 MOVL m_g0(BP), SI 725 MOVL SP, (g_sched+gobuf_sp)(SI) 726 727 havem: 728 // Now there's a valid m, and we're running on its m->g0. 729 // Save current m->g0->sched.sp on stack and then set it to SP. 730 // Save current sp in m->g0->sched.sp in preparation for 731 // switch back to m->curg stack. 732 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 733 MOVL m_g0(BP), SI 734 MOVL (g_sched+gobuf_sp)(SI), AX 735 MOVL AX, 0(SP) 736 MOVL SP, (g_sched+gobuf_sp)(SI) 737 738 // Switch to m->curg stack and call runtime.cgocallbackg. 739 // Because we are taking over the execution of m->curg 740 // but *not* resuming what had been running, we need to 741 // save that information (m->curg->sched) so we can restore it. 742 // We can restore m->curg->sched.sp easily, because calling 743 // runtime.cgocallbackg leaves SP unchanged upon return. 744 // To save m->curg->sched.pc, we push it onto the curg stack and 745 // open a frame the same size as cgocallback's g0 frame. 746 // Once we switch to the curg stack, the pushed PC will appear 747 // to be the return PC of cgocallback, so that the traceback 748 // will seamlessly trace back into the earlier calls. 749 MOVL m_curg(BP), SI 750 MOVL SI, g(CX) 751 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 752 MOVL (g_sched+gobuf_pc)(SI), BP 753 MOVL BP, -4(DI) // "push" return PC on the g stack 754 // Gather our arguments into registers. 755 MOVL fn+0(FP), AX 756 MOVL frame+4(FP), BX 757 MOVL ctxt+8(FP), CX 758 LEAL -(4+12)(DI), SP // Must match declared frame size 759 MOVL AX, 0(SP) 760 MOVL BX, 4(SP) 761 MOVL CX, 8(SP) 762 CALL runtime·cgocallbackg(SB) 763 764 // Restore g->sched (== m->curg->sched) from saved values. 765 get_tls(CX) 766 MOVL g(CX), SI 767 MOVL 12(SP), BP // Must match declared frame size 768 MOVL BP, (g_sched+gobuf_pc)(SI) 769 LEAL (12+4)(SP), DI // Must match declared frame size 770 MOVL DI, (g_sched+gobuf_sp)(SI) 771 772 // Switch back to m->g0's stack and restore m->g0->sched.sp. 773 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 774 // so we do not have to restore it.) 775 MOVL g(CX), BP 776 MOVL g_m(BP), BP 777 MOVL m_g0(BP), SI 778 MOVL SI, g(CX) 779 MOVL (g_sched+gobuf_sp)(SI), SP 780 MOVL 0(SP), AX 781 MOVL AX, (g_sched+gobuf_sp)(SI) 782 783 // If the m on entry was nil, we called needm above to borrow an m 784 // for the duration of the call. Since the call is over, return it with dropm. 785 MOVL savedm-4(SP), DX 786 CMPL DX, $0 787 JNE 3(PC) 788 MOVL $runtime·dropm(SB), AX 789 CALL AX 790 791 // Done! 792 RET 793 794 // void setg(G*); set g. for use by needm. 795 TEXT runtime·setg(SB), NOSPLIT, $0-4 796 MOVL gg+0(FP), BX 797 #ifdef GOOS_windows 798 CMPL BX, $0 799 JNE settls 800 MOVL $0, 0x14(FS) 801 RET 802 settls: 803 MOVL g_m(BX), AX 804 LEAL m_tls(AX), AX 805 MOVL AX, 0x14(FS) 806 #endif 807 get_tls(CX) 808 MOVL BX, g(CX) 809 RET 810 811 // void setg_gcc(G*); set g. for use by gcc 812 TEXT setg_gcc<>(SB), NOSPLIT, $0 813 get_tls(AX) 814 MOVL gg+0(FP), DX 815 MOVL DX, g(AX) 816 RET 817 818 TEXT runtime·abort(SB),NOSPLIT,$0-0 819 INT $3 820 loop: 821 JMP loop 822 823 // check that SP is in range [g->stack.lo, g->stack.hi) 824 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 825 get_tls(CX) 826 MOVL g(CX), AX 827 CMPL (g_stack+stack_hi)(AX), SP 828 JHI 2(PC) 829 CALL runtime·abort(SB) 830 CMPL SP, (g_stack+stack_lo)(AX) 831 JHI 2(PC) 832 CALL runtime·abort(SB) 833 RET 834 835 // func cputicks() int64 836 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 837 // LFENCE/MFENCE instruction support is dependent on SSE2. 838 // When no SSE2 support is present do not enforce any serialization 839 // since using CPUID to serialize the instruction stream is 840 // very costly. 841 #ifdef GO386_softfloat 842 JMP rdtsc // no fence instructions available 843 #endif 844 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1 845 JNE fences 846 // Instruction stream serializing RDTSCP is supported. 847 // RDTSCP is supported by Intel Nehalem (2008) and 848 // AMD K8 Rev. F (2006) and newer. 849 RDTSCP 850 done: 851 MOVL AX, ret_lo+0(FP) 852 MOVL DX, ret_hi+4(FP) 853 RET 854 fences: 855 // MFENCE is instruction stream serializing and flushes the 856 // store buffers on AMD. The serialization semantics of LFENCE on AMD 857 // are dependent on MSR C001_1029 and CPU generation. 858 // LFENCE on Intel does wait for all previous instructions to have executed. 859 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all 860 // previous instructions executed and all previous loads and stores to globally visible. 861 // Using MFENCE;LFENCE here aligns the serializing properties without 862 // runtime detection of CPU manufacturer. 863 MFENCE 864 LFENCE 865 rdtsc: 866 RDTSC 867 JMP done 868 869 TEXT ldt0setup<>(SB),NOSPLIT,$16-0 870 // set up ldt 7 to point at m0.tls 871 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 872 // the entry number is just a hint. setldt will set up GS with what it used. 873 MOVL $7, 0(SP) 874 LEAL runtime·m0+m_tls(SB), AX 875 MOVL AX, 4(SP) 876 MOVL $32, 8(SP) // sizeof(tls array) 877 CALL runtime·setldt(SB) 878 RET 879 880 TEXT runtime·emptyfunc(SB),0,$0-0 881 RET 882 883 // hash function using AES hardware instructions 884 TEXT runtime·memhash(SB),NOSPLIT,$0-16 885 CMPB runtime·useAeshash(SB), $0 886 JEQ noaes 887 MOVL p+0(FP), AX // ptr to data 888 MOVL s+8(FP), BX // size 889 LEAL ret+12(FP), DX 890 JMP aeshashbody<>(SB) 891 noaes: 892 JMP runtime·memhashFallback(SB) 893 894 TEXT runtime·strhash(SB),NOSPLIT,$0-12 895 CMPB runtime·useAeshash(SB), $0 896 JEQ noaes 897 MOVL p+0(FP), AX // ptr to string object 898 MOVL 4(AX), BX // length of string 899 MOVL (AX), AX // string data 900 LEAL ret+8(FP), DX 901 JMP aeshashbody<>(SB) 902 noaes: 903 JMP runtime·strhashFallback(SB) 904 905 // AX: data 906 // BX: length 907 // DX: address to put return value 908 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 909 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 910 PINSRW $4, BX, X0 // 16 bits of length 911 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 912 MOVO X0, X1 // save unscrambled seed 913 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 914 AESENC X0, X0 // scramble seed 915 916 CMPL BX, $16 917 JB aes0to15 918 JE aes16 919 CMPL BX, $32 920 JBE aes17to32 921 CMPL BX, $64 922 JBE aes33to64 923 JMP aes65plus 924 925 aes0to15: 926 TESTL BX, BX 927 JE aes0 928 929 ADDL $16, AX 930 TESTW $0xff0, AX 931 JE endofpage 932 933 // 16 bytes loaded at this address won't cross 934 // a page boundary, so we can load it directly. 935 MOVOU -16(AX), X1 936 ADDL BX, BX 937 PAND masks<>(SB)(BX*8), X1 938 939 final1: 940 AESENC X0, X1 // scramble input, xor in seed 941 AESENC X1, X1 // scramble combo 2 times 942 AESENC X1, X1 943 MOVL X1, (DX) 944 RET 945 946 endofpage: 947 // address ends in 1111xxxx. Might be up against 948 // a page boundary, so load ending at last byte. 949 // Then shift bytes down using pshufb. 950 MOVOU -32(AX)(BX*1), X1 951 ADDL BX, BX 952 PSHUFB shifts<>(SB)(BX*8), X1 953 JMP final1 954 955 aes0: 956 // Return scrambled input seed 957 AESENC X0, X0 958 MOVL X0, (DX) 959 RET 960 961 aes16: 962 MOVOU (AX), X1 963 JMP final1 964 965 aes17to32: 966 // make second starting seed 967 PXOR runtime·aeskeysched+16(SB), X1 968 AESENC X1, X1 969 970 // load data to be hashed 971 MOVOU (AX), X2 972 MOVOU -16(AX)(BX*1), X3 973 974 // scramble 3 times 975 AESENC X0, X2 976 AESENC X1, X3 977 AESENC X2, X2 978 AESENC X3, X3 979 AESENC X2, X2 980 AESENC X3, X3 981 982 // combine results 983 PXOR X3, X2 984 MOVL X2, (DX) 985 RET 986 987 aes33to64: 988 // make 3 more starting seeds 989 MOVO X1, X2 990 MOVO X1, X3 991 PXOR runtime·aeskeysched+16(SB), X1 992 PXOR runtime·aeskeysched+32(SB), X2 993 PXOR runtime·aeskeysched+48(SB), X3 994 AESENC X1, X1 995 AESENC X2, X2 996 AESENC X3, X3 997 998 MOVOU (AX), X4 999 MOVOU 16(AX), X5 1000 MOVOU -32(AX)(BX*1), X6 1001 MOVOU -16(AX)(BX*1), X7 1002 1003 AESENC X0, X4 1004 AESENC X1, X5 1005 AESENC X2, X6 1006 AESENC X3, X7 1007 1008 AESENC X4, X4 1009 AESENC X5, X5 1010 AESENC X6, X6 1011 AESENC X7, X7 1012 1013 AESENC X4, X4 1014 AESENC X5, X5 1015 AESENC X6, X6 1016 AESENC X7, X7 1017 1018 PXOR X6, X4 1019 PXOR X7, X5 1020 PXOR X5, X4 1021 MOVL X4, (DX) 1022 RET 1023 1024 aes65plus: 1025 // make 3 more starting seeds 1026 MOVO X1, X2 1027 MOVO X1, X3 1028 PXOR runtime·aeskeysched+16(SB), X1 1029 PXOR runtime·aeskeysched+32(SB), X2 1030 PXOR runtime·aeskeysched+48(SB), X3 1031 AESENC X1, X1 1032 AESENC X2, X2 1033 AESENC X3, X3 1034 1035 // start with last (possibly overlapping) block 1036 MOVOU -64(AX)(BX*1), X4 1037 MOVOU -48(AX)(BX*1), X5 1038 MOVOU -32(AX)(BX*1), X6 1039 MOVOU -16(AX)(BX*1), X7 1040 1041 // scramble state once 1042 AESENC X0, X4 1043 AESENC X1, X5 1044 AESENC X2, X6 1045 AESENC X3, X7 1046 1047 // compute number of remaining 64-byte blocks 1048 DECL BX 1049 SHRL $6, BX 1050 1051 aesloop: 1052 // scramble state, xor in a block 1053 MOVOU (AX), X0 1054 MOVOU 16(AX), X1 1055 MOVOU 32(AX), X2 1056 MOVOU 48(AX), X3 1057 AESENC X0, X4 1058 AESENC X1, X5 1059 AESENC X2, X6 1060 AESENC X3, X7 1061 1062 // scramble state 1063 AESENC X4, X4 1064 AESENC X5, X5 1065 AESENC X6, X6 1066 AESENC X7, X7 1067 1068 ADDL $64, AX 1069 DECL BX 1070 JNE aesloop 1071 1072 // 2 more scrambles to finish 1073 AESENC X4, X4 1074 AESENC X5, X5 1075 AESENC X6, X6 1076 AESENC X7, X7 1077 1078 AESENC X4, X4 1079 AESENC X5, X5 1080 AESENC X6, X6 1081 AESENC X7, X7 1082 1083 PXOR X6, X4 1084 PXOR X7, X5 1085 PXOR X5, X4 1086 MOVL X4, (DX) 1087 RET 1088 1089 TEXT runtime·memhash32(SB),NOSPLIT,$0-12 1090 CMPB runtime·useAeshash(SB), $0 1091 JEQ noaes 1092 MOVL p+0(FP), AX // ptr to data 1093 MOVL h+4(FP), X0 // seed 1094 PINSRD $1, (AX), X0 // data 1095 AESENC runtime·aeskeysched+0(SB), X0 1096 AESENC runtime·aeskeysched+16(SB), X0 1097 AESENC runtime·aeskeysched+32(SB), X0 1098 MOVL X0, ret+8(FP) 1099 RET 1100 noaes: 1101 JMP runtime·memhash32Fallback(SB) 1102 1103 TEXT runtime·memhash64(SB),NOSPLIT,$0-12 1104 CMPB runtime·useAeshash(SB), $0 1105 JEQ noaes 1106 MOVL p+0(FP), AX // ptr to data 1107 MOVQ (AX), X0 // data 1108 PINSRD $2, h+4(FP), X0 // seed 1109 AESENC runtime·aeskeysched+0(SB), X0 1110 AESENC runtime·aeskeysched+16(SB), X0 1111 AESENC runtime·aeskeysched+32(SB), X0 1112 MOVL X0, ret+8(FP) 1113 RET 1114 noaes: 1115 JMP runtime·memhash64Fallback(SB) 1116 1117 // simple mask to get rid of data in the high part of the register. 1118 DATA masks<>+0x00(SB)/4, $0x00000000 1119 DATA masks<>+0x04(SB)/4, $0x00000000 1120 DATA masks<>+0x08(SB)/4, $0x00000000 1121 DATA masks<>+0x0c(SB)/4, $0x00000000 1122 1123 DATA masks<>+0x10(SB)/4, $0x000000ff 1124 DATA masks<>+0x14(SB)/4, $0x00000000 1125 DATA masks<>+0x18(SB)/4, $0x00000000 1126 DATA masks<>+0x1c(SB)/4, $0x00000000 1127 1128 DATA masks<>+0x20(SB)/4, $0x0000ffff 1129 DATA masks<>+0x24(SB)/4, $0x00000000 1130 DATA masks<>+0x28(SB)/4, $0x00000000 1131 DATA masks<>+0x2c(SB)/4, $0x00000000 1132 1133 DATA masks<>+0x30(SB)/4, $0x00ffffff 1134 DATA masks<>+0x34(SB)/4, $0x00000000 1135 DATA masks<>+0x38(SB)/4, $0x00000000 1136 DATA masks<>+0x3c(SB)/4, $0x00000000 1137 1138 DATA masks<>+0x40(SB)/4, $0xffffffff 1139 DATA masks<>+0x44(SB)/4, $0x00000000 1140 DATA masks<>+0x48(SB)/4, $0x00000000 1141 DATA masks<>+0x4c(SB)/4, $0x00000000 1142 1143 DATA masks<>+0x50(SB)/4, $0xffffffff 1144 DATA masks<>+0x54(SB)/4, $0x000000ff 1145 DATA masks<>+0x58(SB)/4, $0x00000000 1146 DATA masks<>+0x5c(SB)/4, $0x00000000 1147 1148 DATA masks<>+0x60(SB)/4, $0xffffffff 1149 DATA masks<>+0x64(SB)/4, $0x0000ffff 1150 DATA masks<>+0x68(SB)/4, $0x00000000 1151 DATA masks<>+0x6c(SB)/4, $0x00000000 1152 1153 DATA masks<>+0x70(SB)/4, $0xffffffff 1154 DATA masks<>+0x74(SB)/4, $0x00ffffff 1155 DATA masks<>+0x78(SB)/4, $0x00000000 1156 DATA masks<>+0x7c(SB)/4, $0x00000000 1157 1158 DATA masks<>+0x80(SB)/4, $0xffffffff 1159 DATA masks<>+0x84(SB)/4, $0xffffffff 1160 DATA masks<>+0x88(SB)/4, $0x00000000 1161 DATA masks<>+0x8c(SB)/4, $0x00000000 1162 1163 DATA masks<>+0x90(SB)/4, $0xffffffff 1164 DATA masks<>+0x94(SB)/4, $0xffffffff 1165 DATA masks<>+0x98(SB)/4, $0x000000ff 1166 DATA masks<>+0x9c(SB)/4, $0x00000000 1167 1168 DATA masks<>+0xa0(SB)/4, $0xffffffff 1169 DATA masks<>+0xa4(SB)/4, $0xffffffff 1170 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1171 DATA masks<>+0xac(SB)/4, $0x00000000 1172 1173 DATA masks<>+0xb0(SB)/4, $0xffffffff 1174 DATA masks<>+0xb4(SB)/4, $0xffffffff 1175 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1176 DATA masks<>+0xbc(SB)/4, $0x00000000 1177 1178 DATA masks<>+0xc0(SB)/4, $0xffffffff 1179 DATA masks<>+0xc4(SB)/4, $0xffffffff 1180 DATA masks<>+0xc8(SB)/4, $0xffffffff 1181 DATA masks<>+0xcc(SB)/4, $0x00000000 1182 1183 DATA masks<>+0xd0(SB)/4, $0xffffffff 1184 DATA masks<>+0xd4(SB)/4, $0xffffffff 1185 DATA masks<>+0xd8(SB)/4, $0xffffffff 1186 DATA masks<>+0xdc(SB)/4, $0x000000ff 1187 1188 DATA masks<>+0xe0(SB)/4, $0xffffffff 1189 DATA masks<>+0xe4(SB)/4, $0xffffffff 1190 DATA masks<>+0xe8(SB)/4, $0xffffffff 1191 DATA masks<>+0xec(SB)/4, $0x0000ffff 1192 1193 DATA masks<>+0xf0(SB)/4, $0xffffffff 1194 DATA masks<>+0xf4(SB)/4, $0xffffffff 1195 DATA masks<>+0xf8(SB)/4, $0xffffffff 1196 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1197 1198 GLOBL masks<>(SB),RODATA,$256 1199 1200 // these are arguments to pshufb. They move data down from 1201 // the high bytes of the register to the low bytes of the register. 1202 // index is how many bytes to move. 1203 DATA shifts<>+0x00(SB)/4, $0x00000000 1204 DATA shifts<>+0x04(SB)/4, $0x00000000 1205 DATA shifts<>+0x08(SB)/4, $0x00000000 1206 DATA shifts<>+0x0c(SB)/4, $0x00000000 1207 1208 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1209 DATA shifts<>+0x14(SB)/4, $0xffffffff 1210 DATA shifts<>+0x18(SB)/4, $0xffffffff 1211 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1212 1213 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1214 DATA shifts<>+0x24(SB)/4, $0xffffffff 1215 DATA shifts<>+0x28(SB)/4, $0xffffffff 1216 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1217 1218 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1219 DATA shifts<>+0x34(SB)/4, $0xffffffff 1220 DATA shifts<>+0x38(SB)/4, $0xffffffff 1221 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1222 1223 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1224 DATA shifts<>+0x44(SB)/4, $0xffffffff 1225 DATA shifts<>+0x48(SB)/4, $0xffffffff 1226 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1227 1228 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1229 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1230 DATA shifts<>+0x58(SB)/4, $0xffffffff 1231 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1232 1233 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1234 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1235 DATA shifts<>+0x68(SB)/4, $0xffffffff 1236 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1237 1238 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1239 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1240 DATA shifts<>+0x78(SB)/4, $0xffffffff 1241 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1242 1243 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1244 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1245 DATA shifts<>+0x88(SB)/4, $0xffffffff 1246 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1247 1248 DATA shifts<>+0x90(SB)/4, $0x0a090807 1249 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1250 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1251 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1252 1253 DATA shifts<>+0xa0(SB)/4, $0x09080706 1254 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1255 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1256 DATA shifts<>+0xac(SB)/4, $0xffffffff 1257 1258 DATA shifts<>+0xb0(SB)/4, $0x08070605 1259 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1260 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1261 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1262 1263 DATA shifts<>+0xc0(SB)/4, $0x07060504 1264 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1265 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1266 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1267 1268 DATA shifts<>+0xd0(SB)/4, $0x06050403 1269 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1270 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1271 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1272 1273 DATA shifts<>+0xe0(SB)/4, $0x05040302 1274 DATA shifts<>+0xe4(SB)/4, $0x09080706 1275 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1276 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1277 1278 DATA shifts<>+0xf0(SB)/4, $0x04030201 1279 DATA shifts<>+0xf4(SB)/4, $0x08070605 1280 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1281 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1282 1283 GLOBL shifts<>(SB),RODATA,$256 1284 1285 TEXT ·checkASM(SB),NOSPLIT,$0-1 1286 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1287 MOVL $masks<>(SB), AX 1288 MOVL $shifts<>(SB), BX 1289 ORL BX, AX 1290 TESTL $15, AX 1291 SETEQ ret+0(FP) 1292 RET 1293 1294 TEXT runtime·return0(SB), NOSPLIT, $0 1295 MOVL $0, AX 1296 RET 1297 1298 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1299 // Must obey the gcc calling convention. 1300 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1301 get_tls(CX) 1302 MOVL g(CX), AX 1303 MOVL g_m(AX), AX 1304 MOVL m_curg(AX), AX 1305 MOVL (g_stack+stack_hi)(AX), AX 1306 RET 1307 1308 // The top-most function running on a goroutine 1309 // returns to goexit+PCQuantum. 1310 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0 1311 BYTE $0x90 // NOP 1312 CALL runtime·goexit1(SB) // does not return 1313 // traceback from goexit1 must hit code range of goexit 1314 BYTE $0x90 // NOP 1315 1316 // Add a module's moduledata to the linked list of moduledata objects. This 1317 // is called from .init_array by a function generated in the linker and so 1318 // follows the platform ABI wrt register preservation -- it only touches AX, 1319 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1320 // instead the pointer to the moduledata is passed in AX. 1321 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1322 MOVL runtime·lastmoduledatap(SB), DX 1323 MOVL AX, moduledata_next(DX) 1324 MOVL AX, runtime·lastmoduledatap(SB) 1325 RET 1326 1327 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1328 MOVL a+0(FP), AX 1329 MOVL AX, 0(SP) 1330 MOVL $0, 4(SP) 1331 FMOVV 0(SP), F0 1332 FMOVDP F0, ret+4(FP) 1333 RET 1334 1335 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1336 FMOVD a+0(FP), F0 1337 FSTCW 0(SP) 1338 FLDCW runtime·controlWord64trunc(SB) 1339 FMOVVP F0, 4(SP) 1340 FLDCW 0(SP) 1341 MOVL 4(SP), AX 1342 MOVL AX, ret+8(FP) 1343 RET 1344 1345 // gcWriteBarrier performs a heap pointer write and informs the GC. 1346 // 1347 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1348 // - DI is the destination of the write 1349 // - AX is the value being written at DI 1350 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1351 // but may clobber others (e.g., SSE registers). 1352 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28 1353 // Save the registers clobbered by the fast path. This is slightly 1354 // faster than having the caller spill these. 1355 MOVL CX, 20(SP) 1356 MOVL BX, 24(SP) 1357 // TODO: Consider passing g.m.p in as an argument so they can be shared 1358 // across a sequence of write barriers. 1359 get_tls(BX) 1360 MOVL g(BX), BX 1361 MOVL g_m(BX), BX 1362 MOVL m_p(BX), BX 1363 MOVL (p_wbBuf+wbBuf_next)(BX), CX 1364 // Increment wbBuf.next position. 1365 LEAL 8(CX), CX 1366 MOVL CX, (p_wbBuf+wbBuf_next)(BX) 1367 CMPL CX, (p_wbBuf+wbBuf_end)(BX) 1368 // Record the write. 1369 MOVL AX, -8(CX) // Record value 1370 MOVL (DI), BX // TODO: This turns bad writes into bad reads. 1371 MOVL BX, -4(CX) // Record *slot 1372 // Is the buffer full? (flags set in CMPL above) 1373 JEQ flush 1374 ret: 1375 MOVL 20(SP), CX 1376 MOVL 24(SP), BX 1377 // Do the write. 1378 MOVL AX, (DI) 1379 RET 1380 1381 flush: 1382 // Save all general purpose registers since these could be 1383 // clobbered by wbBufFlush and were not saved by the caller. 1384 MOVL DI, 0(SP) // Also first argument to wbBufFlush 1385 MOVL AX, 4(SP) // Also second argument to wbBufFlush 1386 // BX already saved 1387 // CX already saved 1388 MOVL DX, 8(SP) 1389 MOVL BP, 12(SP) 1390 MOVL SI, 16(SP) 1391 // DI already saved 1392 1393 // This takes arguments DI and AX 1394 CALL runtime·wbBufFlush(SB) 1395 1396 MOVL 0(SP), DI 1397 MOVL 4(SP), AX 1398 MOVL 8(SP), DX 1399 MOVL 12(SP), BP 1400 MOVL 16(SP), SI 1401 JMP ret 1402 1403 // Note: these functions use a special calling convention to save generated code space. 1404 // Arguments are passed in registers, but the space for those arguments are allocated 1405 // in the caller's stack frame. These stubs write the args into that stack space and 1406 // then tail call to the corresponding runtime handler. 1407 // The tail call makes these stubs disappear in backtraces. 1408 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8 1409 MOVL AX, x+0(FP) 1410 MOVL CX, y+4(FP) 1411 JMP runtime·goPanicIndex(SB) 1412 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8 1413 MOVL AX, x+0(FP) 1414 MOVL CX, y+4(FP) 1415 JMP runtime·goPanicIndexU(SB) 1416 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8 1417 MOVL CX, x+0(FP) 1418 MOVL DX, y+4(FP) 1419 JMP runtime·goPanicSliceAlen(SB) 1420 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8 1421 MOVL CX, x+0(FP) 1422 MOVL DX, y+4(FP) 1423 JMP runtime·goPanicSliceAlenU(SB) 1424 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8 1425 MOVL CX, x+0(FP) 1426 MOVL DX, y+4(FP) 1427 JMP runtime·goPanicSliceAcap(SB) 1428 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8 1429 MOVL CX, x+0(FP) 1430 MOVL DX, y+4(FP) 1431 JMP runtime·goPanicSliceAcapU(SB) 1432 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8 1433 MOVL AX, x+0(FP) 1434 MOVL CX, y+4(FP) 1435 JMP runtime·goPanicSliceB(SB) 1436 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8 1437 MOVL AX, x+0(FP) 1438 MOVL CX, y+4(FP) 1439 JMP runtime·goPanicSliceBU(SB) 1440 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8 1441 MOVL DX, x+0(FP) 1442 MOVL BX, y+4(FP) 1443 JMP runtime·goPanicSlice3Alen(SB) 1444 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8 1445 MOVL DX, x+0(FP) 1446 MOVL BX, y+4(FP) 1447 JMP runtime·goPanicSlice3AlenU(SB) 1448 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8 1449 MOVL DX, x+0(FP) 1450 MOVL BX, y+4(FP) 1451 JMP runtime·goPanicSlice3Acap(SB) 1452 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8 1453 MOVL DX, x+0(FP) 1454 MOVL BX, y+4(FP) 1455 JMP runtime·goPanicSlice3AcapU(SB) 1456 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8 1457 MOVL CX, x+0(FP) 1458 MOVL DX, y+4(FP) 1459 JMP runtime·goPanicSlice3B(SB) 1460 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8 1461 MOVL CX, x+0(FP) 1462 MOVL DX, y+4(FP) 1463 JMP runtime·goPanicSlice3BU(SB) 1464 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8 1465 MOVL AX, x+0(FP) 1466 MOVL CX, y+4(FP) 1467 JMP runtime·goPanicSlice3C(SB) 1468 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8 1469 MOVL AX, x+0(FP) 1470 MOVL CX, y+4(FP) 1471 JMP runtime·goPanicSlice3CU(SB) 1472 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8 1473 MOVL DX, x+0(FP) 1474 MOVL BX, y+4(FP) 1475 JMP runtime·goPanicSliceConvert(SB) 1476 1477 // Extended versions for 64-bit indexes. 1478 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12 1479 MOVL SI, hi+0(FP) 1480 MOVL AX, lo+4(FP) 1481 MOVL CX, y+8(FP) 1482 JMP runtime·goPanicExtendIndex(SB) 1483 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12 1484 MOVL SI, hi+0(FP) 1485 MOVL AX, lo+4(FP) 1486 MOVL CX, y+8(FP) 1487 JMP runtime·goPanicExtendIndexU(SB) 1488 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12 1489 MOVL SI, hi+0(FP) 1490 MOVL CX, lo+4(FP) 1491 MOVL DX, y+8(FP) 1492 JMP runtime·goPanicExtendSliceAlen(SB) 1493 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12 1494 MOVL SI, hi+0(FP) 1495 MOVL CX, lo+4(FP) 1496 MOVL DX, y+8(FP) 1497 JMP runtime·goPanicExtendSliceAlenU(SB) 1498 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12 1499 MOVL SI, hi+0(FP) 1500 MOVL CX, lo+4(FP) 1501 MOVL DX, y+8(FP) 1502 JMP runtime·goPanicExtendSliceAcap(SB) 1503 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12 1504 MOVL SI, hi+0(FP) 1505 MOVL CX, lo+4(FP) 1506 MOVL DX, y+8(FP) 1507 JMP runtime·goPanicExtendSliceAcapU(SB) 1508 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12 1509 MOVL SI, hi+0(FP) 1510 MOVL AX, lo+4(FP) 1511 MOVL CX, y+8(FP) 1512 JMP runtime·goPanicExtendSliceB(SB) 1513 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12 1514 MOVL SI, hi+0(FP) 1515 MOVL AX, lo+4(FP) 1516 MOVL CX, y+8(FP) 1517 JMP runtime·goPanicExtendSliceBU(SB) 1518 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12 1519 MOVL SI, hi+0(FP) 1520 MOVL DX, lo+4(FP) 1521 MOVL BX, y+8(FP) 1522 JMP runtime·goPanicExtendSlice3Alen(SB) 1523 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12 1524 MOVL SI, hi+0(FP) 1525 MOVL DX, lo+4(FP) 1526 MOVL BX, y+8(FP) 1527 JMP runtime·goPanicExtendSlice3AlenU(SB) 1528 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12 1529 MOVL SI, hi+0(FP) 1530 MOVL DX, lo+4(FP) 1531 MOVL BX, y+8(FP) 1532 JMP runtime·goPanicExtendSlice3Acap(SB) 1533 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12 1534 MOVL SI, hi+0(FP) 1535 MOVL DX, lo+4(FP) 1536 MOVL BX, y+8(FP) 1537 JMP runtime·goPanicExtendSlice3AcapU(SB) 1538 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12 1539 MOVL SI, hi+0(FP) 1540 MOVL CX, lo+4(FP) 1541 MOVL DX, y+8(FP) 1542 JMP runtime·goPanicExtendSlice3B(SB) 1543 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12 1544 MOVL SI, hi+0(FP) 1545 MOVL CX, lo+4(FP) 1546 MOVL DX, y+8(FP) 1547 JMP runtime·goPanicExtendSlice3BU(SB) 1548 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12 1549 MOVL SI, hi+0(FP) 1550 MOVL AX, lo+4(FP) 1551 MOVL CX, y+8(FP) 1552 JMP runtime·goPanicExtendSlice3C(SB) 1553 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 1554 MOVL SI, hi+0(FP) 1555 MOVL AX, lo+4(FP) 1556 MOVL CX, y+8(FP) 1557 JMP runtime·goPanicExtendSlice3CU(SB) 1558 1559 #ifdef GOOS_android 1560 // Use the free TLS_SLOT_APP slot #2 on Android Q. 1561 // Earlier androids are set up in gcc_android.c. 1562 DATA runtime·tls_g+0(SB)/4, $8 1563 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1564 #endif