github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtime·rt0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtime·newosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtime·rt0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtime·g0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 // first see if CPUID instruction is supported. 113 PUSHFL 114 PUSHFL 115 XORL $(1<<21), 0(SP) // flip ID bit 116 POPFL 117 PUSHFL 118 POPL AX 119 XORL 0(SP), AX 120 POPFL // restore EFLAGS 121 TESTL $(1<<21), AX 122 JNE has_cpuid 123 124 bad_proc: // show that the program requires MMX. 125 MOVL $2, 0(SP) 126 MOVL $bad_proc_msg<>(SB), 4(SP) 127 MOVL $0x3d, 8(SP) 128 CALL runtime·write(SB) 129 MOVL $1, 0(SP) 130 CALL runtime·exit(SB) 131 CALL runtime·abort(SB) 132 133 has_cpuid: 134 MOVL $0, AX 135 CPUID 136 MOVL AX, SI 137 CMPL AX, $0 138 JE nocpuinfo 139 140 // Figure out how to serialize RDTSC. 141 // On Intel processors LFENCE is enough. AMD requires MFENCE. 142 // Don't know about the rest, so let's do MFENCE. 143 CMPL BX, $0x756E6547 // "Genu" 144 JNE notintel 145 CMPL DX, $0x49656E69 // "ineI" 146 JNE notintel 147 CMPL CX, $0x6C65746E // "ntel" 148 JNE notintel 149 MOVB $1, runtime·isIntel(SB) 150 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 151 notintel: 152 153 // Load EAX=1 cpuid flags 154 MOVL $1, AX 155 CPUID 156 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 157 MOVL AX, runtime·processorVersionInfo(SB) 158 159 // Check for MMX support 160 TESTL $(1<<23), DX // MMX 161 JZ bad_proc 162 163 nocpuinfo: 164 // if there is an _cgo_init, call it to let it 165 // initialize and to set up GS. if not, 166 // we set up GS ourselves. 167 MOVL _cgo_init(SB), AX 168 TESTL AX, AX 169 JZ needtls 170 #ifdef GOOS_android 171 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 172 // Compensate for tls_g (+8). 173 MOVL -8(TLS), BX 174 MOVL BX, 12(SP) 175 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 176 #else 177 MOVL $0, BX 178 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS 179 MOVL BX, 8(SP) 180 #endif 181 MOVL $setg_gcc<>(SB), BX 182 MOVL BX, 4(SP) // arg 2: setg_gcc 183 MOVL BP, 0(SP) // arg 1: g0 184 CALL AX 185 186 // update stackguard after _cgo_init 187 MOVL $runtime·g0(SB), CX 188 MOVL (g_stack+stack_lo)(CX), AX 189 ADDL $const__StackGuard, AX 190 MOVL AX, g_stackguard0(CX) 191 MOVL AX, g_stackguard1(CX) 192 193 #ifndef GOOS_windows 194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 195 JMP ok 196 #endif 197 needtls: 198 #ifdef GOOS_plan9 199 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 200 JMP ok 201 #endif 202 203 // set up %gs 204 CALL ldt0setup<>(SB) 205 206 // store through it, to make sure it works 207 get_tls(BX) 208 MOVL $0x123, g(BX) 209 MOVL runtime·m0+m_tls(SB), AX 210 CMPL AX, $0x123 211 JEQ ok 212 MOVL AX, 0 // abort 213 ok: 214 // set up m and g "registers" 215 get_tls(BX) 216 LEAL runtime·g0(SB), DX 217 MOVL DX, g(BX) 218 LEAL runtime·m0(SB), AX 219 220 // save m->g0 = g0 221 MOVL DX, m_g0(AX) 222 // save g0->m = m0 223 MOVL AX, g_m(DX) 224 225 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 226 227 // convention is D is always cleared 228 CLD 229 230 CALL runtime·check(SB) 231 232 // saved argc, argv 233 MOVL 120(SP), AX 234 MOVL AX, 0(SP) 235 MOVL 124(SP), AX 236 MOVL AX, 4(SP) 237 CALL runtime·args(SB) 238 CALL runtime·osinit(SB) 239 CALL runtime·schedinit(SB) 240 241 // create a new goroutine to start program 242 PUSHL $runtime·mainPC(SB) // entry 243 PUSHL $0 // arg size 244 CALL runtime·newproc(SB) 245 POPL AX 246 POPL AX 247 248 // start this M 249 CALL runtime·mstart(SB) 250 251 CALL runtime·abort(SB) 252 RET 253 254 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n" 255 GLOBL bad_proc_msg<>(SB), RODATA, $61 256 257 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 258 GLOBL runtime·mainPC(SB),RODATA,$4 259 260 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 261 INT $3 262 RET 263 264 TEXT runtime·asminit(SB),NOSPLIT,$0-0 265 // Linux and MinGW start the FPU in extended double precision. 266 // Other operating systems use double precision. 267 // Change to double precision to match them, 268 // and to match other hardware that only has double. 269 FLDCW runtime·controlWord64(SB) 270 RET 271 272 /* 273 * go-routine 274 */ 275 276 // void gosave(Gobuf*) 277 // save state in Gobuf; setjmp 278 TEXT runtime·gosave(SB), NOSPLIT, $0-4 279 MOVL buf+0(FP), AX // gobuf 280 LEAL buf+0(FP), BX // caller's SP 281 MOVL BX, gobuf_sp(AX) 282 MOVL 0(SP), BX // caller's PC 283 MOVL BX, gobuf_pc(AX) 284 MOVL $0, gobuf_ret(AX) 285 // Assert ctxt is zero. See func save. 286 MOVL gobuf_ctxt(AX), BX 287 TESTL BX, BX 288 JZ 2(PC) 289 CALL runtime·badctxt(SB) 290 get_tls(CX) 291 MOVL g(CX), BX 292 MOVL BX, gobuf_g(AX) 293 RET 294 295 // void gogo(Gobuf*) 296 // restore state from Gobuf; longjmp 297 TEXT runtime·gogo(SB), NOSPLIT, $8-4 298 MOVL buf+0(FP), BX // gobuf 299 MOVL gobuf_g(BX), DX 300 MOVL 0(DX), CX // make sure g != nil 301 get_tls(CX) 302 MOVL DX, g(CX) 303 MOVL gobuf_sp(BX), SP // restore SP 304 MOVL gobuf_ret(BX), AX 305 MOVL gobuf_ctxt(BX), DX 306 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 307 MOVL $0, gobuf_ret(BX) 308 MOVL $0, gobuf_ctxt(BX) 309 MOVL gobuf_pc(BX), BX 310 JMP BX 311 312 // func mcall(fn func(*g)) 313 // Switch to m->g0's stack, call fn(g). 314 // Fn must never return. It should gogo(&g->sched) 315 // to keep running g. 316 TEXT runtime·mcall(SB), NOSPLIT, $0-4 317 MOVL fn+0(FP), DI 318 319 get_tls(DX) 320 MOVL g(DX), AX // save state in g->sched 321 MOVL 0(SP), BX // caller's PC 322 MOVL BX, (g_sched+gobuf_pc)(AX) 323 LEAL fn+0(FP), BX // caller's SP 324 MOVL BX, (g_sched+gobuf_sp)(AX) 325 MOVL AX, (g_sched+gobuf_g)(AX) 326 327 // switch to m->g0 & its stack, call fn 328 MOVL g(DX), BX 329 MOVL g_m(BX), BX 330 MOVL m_g0(BX), SI 331 CMPL SI, AX // if g == m->g0 call badmcall 332 JNE 3(PC) 333 MOVL $runtime·badmcall(SB), AX 334 JMP AX 335 MOVL SI, g(DX) // g = m->g0 336 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 337 PUSHL AX 338 MOVL DI, DX 339 MOVL 0(DI), DI 340 CALL DI 341 POPL AX 342 MOVL $runtime·badmcall2(SB), AX 343 JMP AX 344 RET 345 346 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 347 // of the G stack. We need to distinguish the routine that 348 // lives at the bottom of the G stack from the one that lives 349 // at the top of the system stack because the one at the top of 350 // the system stack terminates the stack walk (see topofstack()). 351 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 352 RET 353 354 // func systemstack(fn func()) 355 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 356 MOVL fn+0(FP), DI // DI = fn 357 get_tls(CX) 358 MOVL g(CX), AX // AX = g 359 MOVL g_m(AX), BX // BX = m 360 361 CMPL AX, m_gsignal(BX) 362 JEQ noswitch 363 364 MOVL m_g0(BX), DX // DX = g0 365 CMPL AX, DX 366 JEQ noswitch 367 368 CMPL AX, m_curg(BX) 369 JNE bad 370 371 // switch stacks 372 // save our state in g->sched. Pretend to 373 // be systemstack_switch if the G stack is scanned. 374 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 375 MOVL SP, (g_sched+gobuf_sp)(AX) 376 MOVL AX, (g_sched+gobuf_g)(AX) 377 378 // switch to g0 379 get_tls(CX) 380 MOVL DX, g(CX) 381 MOVL (g_sched+gobuf_sp)(DX), BX 382 // make it look like mstart called systemstack on g0, to stop traceback 383 SUBL $4, BX 384 MOVL $runtime·mstart(SB), DX 385 MOVL DX, 0(BX) 386 MOVL BX, SP 387 388 // call target function 389 MOVL DI, DX 390 MOVL 0(DI), DI 391 CALL DI 392 393 // switch back to g 394 get_tls(CX) 395 MOVL g(CX), AX 396 MOVL g_m(AX), BX 397 MOVL m_curg(BX), AX 398 MOVL AX, g(CX) 399 MOVL (g_sched+gobuf_sp)(AX), SP 400 MOVL $0, (g_sched+gobuf_sp)(AX) 401 RET 402 403 noswitch: 404 // already on system stack; tail call the function 405 // Using a tail call here cleans up tracebacks since we won't stop 406 // at an intermediate systemstack. 407 MOVL DI, DX 408 MOVL 0(DI), DI 409 JMP DI 410 411 bad: 412 // Bad: g is not gsignal, not g0, not curg. What is it? 413 // Hide call from linker nosplit analysis. 414 MOVL $runtime·badsystemstack(SB), AX 415 CALL AX 416 INT $3 417 418 /* 419 * support for morestack 420 */ 421 422 // Called during function prolog when more stack is needed. 423 // 424 // The traceback routines see morestack on a g0 as being 425 // the top of a stack (for example, morestack calling newstack 426 // calling the scheduler calling newm calling gc), so we must 427 // record an argument size. For that purpose, it has no arguments. 428 TEXT runtime·morestack(SB),NOSPLIT,$0-0 429 // Cannot grow scheduler stack (m->g0). 430 get_tls(CX) 431 MOVL g(CX), BX 432 MOVL g_m(BX), BX 433 MOVL m_g0(BX), SI 434 CMPL g(CX), SI 435 JNE 3(PC) 436 CALL runtime·badmorestackg0(SB) 437 CALL runtime·abort(SB) 438 439 // Cannot grow signal stack. 440 MOVL m_gsignal(BX), SI 441 CMPL g(CX), SI 442 JNE 3(PC) 443 CALL runtime·badmorestackgsignal(SB) 444 CALL runtime·abort(SB) 445 446 // Called from f. 447 // Set m->morebuf to f's caller. 448 NOP SP // tell vet SP changed - stop checking offsets 449 MOVL 4(SP), DI // f's caller's PC 450 MOVL DI, (m_morebuf+gobuf_pc)(BX) 451 LEAL 8(SP), CX // f's caller's SP 452 MOVL CX, (m_morebuf+gobuf_sp)(BX) 453 get_tls(CX) 454 MOVL g(CX), SI 455 MOVL SI, (m_morebuf+gobuf_g)(BX) 456 457 // Set g->sched to context in f. 458 MOVL 0(SP), AX // f's PC 459 MOVL AX, (g_sched+gobuf_pc)(SI) 460 MOVL SI, (g_sched+gobuf_g)(SI) 461 LEAL 4(SP), AX // f's SP 462 MOVL AX, (g_sched+gobuf_sp)(SI) 463 MOVL DX, (g_sched+gobuf_ctxt)(SI) 464 465 // Call newstack on m->g0's stack. 466 MOVL m_g0(BX), BP 467 MOVL BP, g(CX) 468 MOVL (g_sched+gobuf_sp)(BP), AX 469 MOVL -4(AX), BX // fault if CALL would, before smashing SP 470 MOVL AX, SP 471 CALL runtime·newstack(SB) 472 CALL runtime·abort(SB) // crash if newstack returns 473 RET 474 475 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 476 MOVL $0, DX 477 JMP runtime·morestack(SB) 478 479 // reflectcall: call a function with the given argument list 480 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 481 // we don't have variable-sized frames, so we use a small number 482 // of constant-sized-frame functions to encode a few bits of size in the pc. 483 // Caution: ugly multiline assembly macros in your future! 484 485 #define DISPATCH(NAME,MAXSIZE) \ 486 CMPL CX, $MAXSIZE; \ 487 JA 3(PC); \ 488 MOVL $NAME(SB), AX; \ 489 JMP AX 490 // Note: can't just "JMP NAME(SB)" - bad inlining results. 491 492 TEXT ·reflectcall(SB), NOSPLIT, $0-20 493 MOVL argsize+12(FP), CX 494 DISPATCH(runtime·call16, 16) 495 DISPATCH(runtime·call32, 32) 496 DISPATCH(runtime·call64, 64) 497 DISPATCH(runtime·call128, 128) 498 DISPATCH(runtime·call256, 256) 499 DISPATCH(runtime·call512, 512) 500 DISPATCH(runtime·call1024, 1024) 501 DISPATCH(runtime·call2048, 2048) 502 DISPATCH(runtime·call4096, 4096) 503 DISPATCH(runtime·call8192, 8192) 504 DISPATCH(runtime·call16384, 16384) 505 DISPATCH(runtime·call32768, 32768) 506 DISPATCH(runtime·call65536, 65536) 507 DISPATCH(runtime·call131072, 131072) 508 DISPATCH(runtime·call262144, 262144) 509 DISPATCH(runtime·call524288, 524288) 510 DISPATCH(runtime·call1048576, 1048576) 511 DISPATCH(runtime·call2097152, 2097152) 512 DISPATCH(runtime·call4194304, 4194304) 513 DISPATCH(runtime·call8388608, 8388608) 514 DISPATCH(runtime·call16777216, 16777216) 515 DISPATCH(runtime·call33554432, 33554432) 516 DISPATCH(runtime·call67108864, 67108864) 517 DISPATCH(runtime·call134217728, 134217728) 518 DISPATCH(runtime·call268435456, 268435456) 519 DISPATCH(runtime·call536870912, 536870912) 520 DISPATCH(runtime·call1073741824, 1073741824) 521 MOVL $runtime·badreflectcall(SB), AX 522 JMP AX 523 524 #define CALLFN(NAME,MAXSIZE) \ 525 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 526 NO_LOCAL_POINTERS; \ 527 /* copy arguments to stack */ \ 528 MOVL argptr+8(FP), SI; \ 529 MOVL argsize+12(FP), CX; \ 530 MOVL SP, DI; \ 531 REP;MOVSB; \ 532 /* call function */ \ 533 MOVL f+4(FP), DX; \ 534 MOVL (DX), AX; \ 535 PCDATA $PCDATA_StackMapIndex, $0; \ 536 CALL AX; \ 537 /* copy return values back */ \ 538 MOVL argtype+0(FP), DX; \ 539 MOVL argptr+8(FP), DI; \ 540 MOVL argsize+12(FP), CX; \ 541 MOVL retoffset+16(FP), BX; \ 542 MOVL SP, SI; \ 543 ADDL BX, DI; \ 544 ADDL BX, SI; \ 545 SUBL BX, CX; \ 546 CALL callRet<>(SB); \ 547 RET 548 549 // callRet copies return values back at the end of call*. This is a 550 // separate function so it can allocate stack space for the arguments 551 // to reflectcallmove. It does not follow the Go ABI; it expects its 552 // arguments in registers. 553 TEXT callRet<>(SB), NOSPLIT, $16-0 554 MOVL DX, 0(SP) 555 MOVL DI, 4(SP) 556 MOVL SI, 8(SP) 557 MOVL CX, 12(SP) 558 CALL runtime·reflectcallmove(SB) 559 RET 560 561 CALLFN(·call16, 16) 562 CALLFN(·call32, 32) 563 CALLFN(·call64, 64) 564 CALLFN(·call128, 128) 565 CALLFN(·call256, 256) 566 CALLFN(·call512, 512) 567 CALLFN(·call1024, 1024) 568 CALLFN(·call2048, 2048) 569 CALLFN(·call4096, 4096) 570 CALLFN(·call8192, 8192) 571 CALLFN(·call16384, 16384) 572 CALLFN(·call32768, 32768) 573 CALLFN(·call65536, 65536) 574 CALLFN(·call131072, 131072) 575 CALLFN(·call262144, 262144) 576 CALLFN(·call524288, 524288) 577 CALLFN(·call1048576, 1048576) 578 CALLFN(·call2097152, 2097152) 579 CALLFN(·call4194304, 4194304) 580 CALLFN(·call8388608, 8388608) 581 CALLFN(·call16777216, 16777216) 582 CALLFN(·call33554432, 33554432) 583 CALLFN(·call67108864, 67108864) 584 CALLFN(·call134217728, 134217728) 585 CALLFN(·call268435456, 268435456) 586 CALLFN(·call536870912, 536870912) 587 CALLFN(·call1073741824, 1073741824) 588 589 TEXT runtime·procyield(SB),NOSPLIT,$0-0 590 MOVL cycles+0(FP), AX 591 again: 592 PAUSE 593 SUBL $1, AX 594 JNZ again 595 RET 596 597 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 598 // Stores are already ordered on x86, so this is just a 599 // compile barrier. 600 RET 601 602 // void jmpdefer(fn, sp); 603 // called from deferreturn. 604 // 1. pop the caller 605 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 606 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 607 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 608 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 609 // 3. jmp to the argument 610 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 611 MOVL fv+0(FP), DX // fn 612 MOVL argp+4(FP), BX // caller sp 613 LEAL -4(BX), SP // caller sp after CALL 614 #ifdef GOBUILDMODE_shared 615 SUBL $16, (SP) // return to CALL again 616 #else 617 SUBL $5, (SP) // return to CALL again 618 #endif 619 MOVL 0(DX), BX 620 JMP BX // but first run the deferred function 621 622 // Save state of caller into g->sched. 623 TEXT gosave<>(SB),NOSPLIT,$0 624 PUSHL AX 625 PUSHL BX 626 get_tls(BX) 627 MOVL g(BX), BX 628 LEAL arg+0(FP), AX 629 MOVL AX, (g_sched+gobuf_sp)(BX) 630 MOVL -4(AX), AX 631 MOVL AX, (g_sched+gobuf_pc)(BX) 632 MOVL $0, (g_sched+gobuf_ret)(BX) 633 // Assert ctxt is zero. See func save. 634 MOVL (g_sched+gobuf_ctxt)(BX), AX 635 TESTL AX, AX 636 JZ 2(PC) 637 CALL runtime·badctxt(SB) 638 POPL BX 639 POPL AX 640 RET 641 642 // func asmcgocall(fn, arg unsafe.Pointer) int32 643 // Call fn(arg) on the scheduler stack, 644 // aligned appropriately for the gcc ABI. 645 // See cgocall.go for more details. 646 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 647 MOVL fn+0(FP), AX 648 MOVL arg+4(FP), BX 649 650 MOVL SP, DX 651 652 // Figure out if we need to switch to m->g0 stack. 653 // We get called to create new OS threads too, and those 654 // come in on the m->g0 stack already. 655 get_tls(CX) 656 MOVL g(CX), BP 657 CMPL BP, $0 658 JEQ nosave // Don't even have a G yet. 659 MOVL g_m(BP), BP 660 MOVL m_g0(BP), SI 661 MOVL g(CX), DI 662 CMPL SI, DI 663 JEQ noswitch 664 CMPL DI, m_gsignal(BP) 665 JEQ noswitch 666 CALL gosave<>(SB) 667 get_tls(CX) 668 MOVL SI, g(CX) 669 MOVL (g_sched+gobuf_sp)(SI), SP 670 671 noswitch: 672 // Now on a scheduling stack (a pthread-created stack). 673 SUBL $32, SP 674 ANDL $~15, SP // alignment, perhaps unnecessary 675 MOVL DI, 8(SP) // save g 676 MOVL (g_stack+stack_hi)(DI), DI 677 SUBL DX, DI 678 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 679 MOVL BX, 0(SP) // first argument in x86-32 ABI 680 CALL AX 681 682 // Restore registers, g, stack pointer. 683 get_tls(CX) 684 MOVL 8(SP), DI 685 MOVL (g_stack+stack_hi)(DI), SI 686 SUBL 4(SP), SI 687 MOVL DI, g(CX) 688 MOVL SI, SP 689 690 MOVL AX, ret+8(FP) 691 RET 692 nosave: 693 // Now on a scheduling stack (a pthread-created stack). 694 SUBL $32, SP 695 ANDL $~15, SP // alignment, perhaps unnecessary 696 MOVL DX, 4(SP) // save original stack pointer 697 MOVL BX, 0(SP) // first argument in x86-32 ABI 698 CALL AX 699 700 MOVL 4(SP), CX // restore original stack pointer 701 MOVL CX, SP 702 MOVL AX, ret+8(FP) 703 RET 704 705 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 706 // See cgocall.go for more details. 707 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below 708 NO_LOCAL_POINTERS 709 710 // If g is nil, Go did not create the current thread. 711 // Call needm to obtain one for temporary use. 712 // In this case, we're running on the thread stack, so there's 713 // lots of space, but the linker doesn't know. Hide the call from 714 // the linker analysis by using an indirect call through AX. 715 get_tls(CX) 716 #ifdef GOOS_windows 717 MOVL $0, BP 718 CMPL CX, $0 719 JEQ 2(PC) // TODO 720 #endif 721 MOVL g(CX), BP 722 CMPL BP, $0 723 JEQ needm 724 MOVL g_m(BP), BP 725 MOVL BP, savedm-4(SP) // saved copy of oldm 726 JMP havem 727 needm: 728 MOVL $runtime·needm(SB), AX 729 CALL AX 730 MOVL $0, savedm-4(SP) // dropm on return 731 get_tls(CX) 732 MOVL g(CX), BP 733 MOVL g_m(BP), BP 734 735 // Set m->sched.sp = SP, so that if a panic happens 736 // during the function we are about to execute, it will 737 // have a valid SP to run on the g0 stack. 738 // The next few lines (after the havem label) 739 // will save this SP onto the stack and then write 740 // the same SP back to m->sched.sp. That seems redundant, 741 // but if an unrecovered panic happens, unwindm will 742 // restore the g->sched.sp from the stack location 743 // and then systemstack will try to use it. If we don't set it here, 744 // that restored SP will be uninitialized (typically 0) and 745 // will not be usable. 746 MOVL m_g0(BP), SI 747 MOVL SP, (g_sched+gobuf_sp)(SI) 748 749 havem: 750 // Now there's a valid m, and we're running on its m->g0. 751 // Save current m->g0->sched.sp on stack and then set it to SP. 752 // Save current sp in m->g0->sched.sp in preparation for 753 // switch back to m->curg stack. 754 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 755 MOVL m_g0(BP), SI 756 MOVL (g_sched+gobuf_sp)(SI), AX 757 MOVL AX, 0(SP) 758 MOVL SP, (g_sched+gobuf_sp)(SI) 759 760 // Switch to m->curg stack and call runtime.cgocallbackg. 761 // Because we are taking over the execution of m->curg 762 // but *not* resuming what had been running, we need to 763 // save that information (m->curg->sched) so we can restore it. 764 // We can restore m->curg->sched.sp easily, because calling 765 // runtime.cgocallbackg leaves SP unchanged upon return. 766 // To save m->curg->sched.pc, we push it onto the curg stack and 767 // open a frame the same size as cgocallback's g0 frame. 768 // Once we switch to the curg stack, the pushed PC will appear 769 // to be the return PC of cgocallback, so that the traceback 770 // will seamlessly trace back into the earlier calls. 771 MOVL m_curg(BP), SI 772 MOVL SI, g(CX) 773 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 774 MOVL (g_sched+gobuf_pc)(SI), BP 775 MOVL BP, -4(DI) // "push" return PC on the g stack 776 // Gather our arguments into registers. 777 MOVL fn+0(FP), AX 778 MOVL frame+4(FP), BX 779 MOVL ctxt+8(FP), CX 780 LEAL -(4+12)(DI), SP // Must match declared frame size 781 MOVL AX, 0(SP) 782 MOVL BX, 4(SP) 783 MOVL CX, 8(SP) 784 CALL runtime·cgocallbackg(SB) 785 786 // Restore g->sched (== m->curg->sched) from saved values. 787 get_tls(CX) 788 MOVL g(CX), SI 789 MOVL 12(SP), BP // Must match declared frame size 790 MOVL BP, (g_sched+gobuf_pc)(SI) 791 LEAL (12+4)(SP), DI // Must match declared frame size 792 MOVL DI, (g_sched+gobuf_sp)(SI) 793 794 // Switch back to m->g0's stack and restore m->g0->sched.sp. 795 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 796 // so we do not have to restore it.) 797 MOVL g(CX), BP 798 MOVL g_m(BP), BP 799 MOVL m_g0(BP), SI 800 MOVL SI, g(CX) 801 MOVL (g_sched+gobuf_sp)(SI), SP 802 MOVL 0(SP), AX 803 MOVL AX, (g_sched+gobuf_sp)(SI) 804 805 // If the m on entry was nil, we called needm above to borrow an m 806 // for the duration of the call. Since the call is over, return it with dropm. 807 MOVL savedm-4(SP), DX 808 CMPL DX, $0 809 JNE 3(PC) 810 MOVL $runtime·dropm(SB), AX 811 CALL AX 812 813 // Done! 814 RET 815 816 // void setg(G*); set g. for use by needm. 817 TEXT runtime·setg(SB), NOSPLIT, $0-4 818 MOVL gg+0(FP), BX 819 #ifdef GOOS_windows 820 CMPL BX, $0 821 JNE settls 822 MOVL $0, 0x14(FS) 823 RET 824 settls: 825 MOVL g_m(BX), AX 826 LEAL m_tls(AX), AX 827 MOVL AX, 0x14(FS) 828 #endif 829 get_tls(CX) 830 MOVL BX, g(CX) 831 RET 832 833 // void setg_gcc(G*); set g. for use by gcc 834 TEXT setg_gcc<>(SB), NOSPLIT, $0 835 get_tls(AX) 836 MOVL gg+0(FP), DX 837 MOVL DX, g(AX) 838 RET 839 840 TEXT runtime·abort(SB),NOSPLIT,$0-0 841 INT $3 842 loop: 843 JMP loop 844 845 // check that SP is in range [g->stack.lo, g->stack.hi) 846 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 847 get_tls(CX) 848 MOVL g(CX), AX 849 CMPL (g_stack+stack_hi)(AX), SP 850 JHI 2(PC) 851 CALL runtime·abort(SB) 852 CMPL SP, (g_stack+stack_lo)(AX) 853 JHI 2(PC) 854 CALL runtime·abort(SB) 855 RET 856 857 // func cputicks() int64 858 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 859 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 860 JNE done 861 CMPB runtime·lfenceBeforeRdtsc(SB), $1 862 JNE mfence 863 LFENCE 864 JMP done 865 mfence: 866 MFENCE 867 done: 868 RDTSC 869 MOVL AX, ret_lo+0(FP) 870 MOVL DX, ret_hi+4(FP) 871 RET 872 873 TEXT ldt0setup<>(SB),NOSPLIT,$16-0 874 // set up ldt 7 to point at m0.tls 875 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 876 // the entry number is just a hint. setldt will set up GS with what it used. 877 MOVL $7, 0(SP) 878 LEAL runtime·m0+m_tls(SB), AX 879 MOVL AX, 4(SP) 880 MOVL $32, 8(SP) // sizeof(tls array) 881 CALL runtime·setldt(SB) 882 RET 883 884 TEXT runtime·emptyfunc(SB),0,$0-0 885 RET 886 887 // hash function using AES hardware instructions 888 TEXT runtime·memhash(SB),NOSPLIT,$0-16 889 CMPB runtime·useAeshash(SB), $0 890 JEQ noaes 891 MOVL p+0(FP), AX // ptr to data 892 MOVL s+8(FP), BX // size 893 LEAL ret+12(FP), DX 894 JMP aeshashbody<>(SB) 895 noaes: 896 JMP runtime·memhashFallback(SB) 897 898 TEXT runtime·strhash(SB),NOSPLIT,$0-12 899 CMPB runtime·useAeshash(SB), $0 900 JEQ noaes 901 MOVL p+0(FP), AX // ptr to string object 902 MOVL 4(AX), BX // length of string 903 MOVL (AX), AX // string data 904 LEAL ret+8(FP), DX 905 JMP aeshashbody<>(SB) 906 noaes: 907 JMP runtime·strhashFallback(SB) 908 909 // AX: data 910 // BX: length 911 // DX: address to put return value 912 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 913 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 914 PINSRW $4, BX, X0 // 16 bits of length 915 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 916 MOVO X0, X1 // save unscrambled seed 917 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 918 AESENC X0, X0 // scramble seed 919 920 CMPL BX, $16 921 JB aes0to15 922 JE aes16 923 CMPL BX, $32 924 JBE aes17to32 925 CMPL BX, $64 926 JBE aes33to64 927 JMP aes65plus 928 929 aes0to15: 930 TESTL BX, BX 931 JE aes0 932 933 ADDL $16, AX 934 TESTW $0xff0, AX 935 JE endofpage 936 937 // 16 bytes loaded at this address won't cross 938 // a page boundary, so we can load it directly. 939 MOVOU -16(AX), X1 940 ADDL BX, BX 941 PAND masks<>(SB)(BX*8), X1 942 943 final1: 944 AESENC X0, X1 // scramble input, xor in seed 945 AESENC X1, X1 // scramble combo 2 times 946 AESENC X1, X1 947 MOVL X1, (DX) 948 RET 949 950 endofpage: 951 // address ends in 1111xxxx. Might be up against 952 // a page boundary, so load ending at last byte. 953 // Then shift bytes down using pshufb. 954 MOVOU -32(AX)(BX*1), X1 955 ADDL BX, BX 956 PSHUFB shifts<>(SB)(BX*8), X1 957 JMP final1 958 959 aes0: 960 // Return scrambled input seed 961 AESENC X0, X0 962 MOVL X0, (DX) 963 RET 964 965 aes16: 966 MOVOU (AX), X1 967 JMP final1 968 969 aes17to32: 970 // make second starting seed 971 PXOR runtime·aeskeysched+16(SB), X1 972 AESENC X1, X1 973 974 // load data to be hashed 975 MOVOU (AX), X2 976 MOVOU -16(AX)(BX*1), X3 977 978 // scramble 3 times 979 AESENC X0, X2 980 AESENC X1, X3 981 AESENC X2, X2 982 AESENC X3, X3 983 AESENC X2, X2 984 AESENC X3, X3 985 986 // combine results 987 PXOR X3, X2 988 MOVL X2, (DX) 989 RET 990 991 aes33to64: 992 // make 3 more starting seeds 993 MOVO X1, X2 994 MOVO X1, X3 995 PXOR runtime·aeskeysched+16(SB), X1 996 PXOR runtime·aeskeysched+32(SB), X2 997 PXOR runtime·aeskeysched+48(SB), X3 998 AESENC X1, X1 999 AESENC X2, X2 1000 AESENC X3, X3 1001 1002 MOVOU (AX), X4 1003 MOVOU 16(AX), X5 1004 MOVOU -32(AX)(BX*1), X6 1005 MOVOU -16(AX)(BX*1), X7 1006 1007 AESENC X0, X4 1008 AESENC X1, X5 1009 AESENC X2, X6 1010 AESENC X3, X7 1011 1012 AESENC X4, X4 1013 AESENC X5, X5 1014 AESENC X6, X6 1015 AESENC X7, X7 1016 1017 AESENC X4, X4 1018 AESENC X5, X5 1019 AESENC X6, X6 1020 AESENC X7, X7 1021 1022 PXOR X6, X4 1023 PXOR X7, X5 1024 PXOR X5, X4 1025 MOVL X4, (DX) 1026 RET 1027 1028 aes65plus: 1029 // make 3 more starting seeds 1030 MOVO X1, X2 1031 MOVO X1, X3 1032 PXOR runtime·aeskeysched+16(SB), X1 1033 PXOR runtime·aeskeysched+32(SB), X2 1034 PXOR runtime·aeskeysched+48(SB), X3 1035 AESENC X1, X1 1036 AESENC X2, X2 1037 AESENC X3, X3 1038 1039 // start with last (possibly overlapping) block 1040 MOVOU -64(AX)(BX*1), X4 1041 MOVOU -48(AX)(BX*1), X5 1042 MOVOU -32(AX)(BX*1), X6 1043 MOVOU -16(AX)(BX*1), X7 1044 1045 // scramble state once 1046 AESENC X0, X4 1047 AESENC X1, X5 1048 AESENC X2, X6 1049 AESENC X3, X7 1050 1051 // compute number of remaining 64-byte blocks 1052 DECL BX 1053 SHRL $6, BX 1054 1055 aesloop: 1056 // scramble state, xor in a block 1057 MOVOU (AX), X0 1058 MOVOU 16(AX), X1 1059 MOVOU 32(AX), X2 1060 MOVOU 48(AX), X3 1061 AESENC X0, X4 1062 AESENC X1, X5 1063 AESENC X2, X6 1064 AESENC X3, X7 1065 1066 // scramble state 1067 AESENC X4, X4 1068 AESENC X5, X5 1069 AESENC X6, X6 1070 AESENC X7, X7 1071 1072 ADDL $64, AX 1073 DECL BX 1074 JNE aesloop 1075 1076 // 2 more scrambles to finish 1077 AESENC X4, X4 1078 AESENC X5, X5 1079 AESENC X6, X6 1080 AESENC X7, X7 1081 1082 AESENC X4, X4 1083 AESENC X5, X5 1084 AESENC X6, X6 1085 AESENC X7, X7 1086 1087 PXOR X6, X4 1088 PXOR X7, X5 1089 PXOR X5, X4 1090 MOVL X4, (DX) 1091 RET 1092 1093 TEXT runtime·memhash32(SB),NOSPLIT,$0-12 1094 CMPB runtime·useAeshash(SB), $0 1095 JEQ noaes 1096 MOVL p+0(FP), AX // ptr to data 1097 MOVL h+4(FP), X0 // seed 1098 PINSRD $1, (AX), X0 // data 1099 AESENC runtime·aeskeysched+0(SB), X0 1100 AESENC runtime·aeskeysched+16(SB), X0 1101 AESENC runtime·aeskeysched+32(SB), X0 1102 MOVL X0, ret+8(FP) 1103 RET 1104 noaes: 1105 JMP runtime·memhash32Fallback(SB) 1106 1107 TEXT runtime·memhash64(SB),NOSPLIT,$0-12 1108 CMPB runtime·useAeshash(SB), $0 1109 JEQ noaes 1110 MOVL p+0(FP), AX // ptr to data 1111 MOVQ (AX), X0 // data 1112 PINSRD $2, h+4(FP), X0 // seed 1113 AESENC runtime·aeskeysched+0(SB), X0 1114 AESENC runtime·aeskeysched+16(SB), X0 1115 AESENC runtime·aeskeysched+32(SB), X0 1116 MOVL X0, ret+8(FP) 1117 RET 1118 noaes: 1119 JMP runtime·memhash64Fallback(SB) 1120 1121 // simple mask to get rid of data in the high part of the register. 1122 DATA masks<>+0x00(SB)/4, $0x00000000 1123 DATA masks<>+0x04(SB)/4, $0x00000000 1124 DATA masks<>+0x08(SB)/4, $0x00000000 1125 DATA masks<>+0x0c(SB)/4, $0x00000000 1126 1127 DATA masks<>+0x10(SB)/4, $0x000000ff 1128 DATA masks<>+0x14(SB)/4, $0x00000000 1129 DATA masks<>+0x18(SB)/4, $0x00000000 1130 DATA masks<>+0x1c(SB)/4, $0x00000000 1131 1132 DATA masks<>+0x20(SB)/4, $0x0000ffff 1133 DATA masks<>+0x24(SB)/4, $0x00000000 1134 DATA masks<>+0x28(SB)/4, $0x00000000 1135 DATA masks<>+0x2c(SB)/4, $0x00000000 1136 1137 DATA masks<>+0x30(SB)/4, $0x00ffffff 1138 DATA masks<>+0x34(SB)/4, $0x00000000 1139 DATA masks<>+0x38(SB)/4, $0x00000000 1140 DATA masks<>+0x3c(SB)/4, $0x00000000 1141 1142 DATA masks<>+0x40(SB)/4, $0xffffffff 1143 DATA masks<>+0x44(SB)/4, $0x00000000 1144 DATA masks<>+0x48(SB)/4, $0x00000000 1145 DATA masks<>+0x4c(SB)/4, $0x00000000 1146 1147 DATA masks<>+0x50(SB)/4, $0xffffffff 1148 DATA masks<>+0x54(SB)/4, $0x000000ff 1149 DATA masks<>+0x58(SB)/4, $0x00000000 1150 DATA masks<>+0x5c(SB)/4, $0x00000000 1151 1152 DATA masks<>+0x60(SB)/4, $0xffffffff 1153 DATA masks<>+0x64(SB)/4, $0x0000ffff 1154 DATA masks<>+0x68(SB)/4, $0x00000000 1155 DATA masks<>+0x6c(SB)/4, $0x00000000 1156 1157 DATA masks<>+0x70(SB)/4, $0xffffffff 1158 DATA masks<>+0x74(SB)/4, $0x00ffffff 1159 DATA masks<>+0x78(SB)/4, $0x00000000 1160 DATA masks<>+0x7c(SB)/4, $0x00000000 1161 1162 DATA masks<>+0x80(SB)/4, $0xffffffff 1163 DATA masks<>+0x84(SB)/4, $0xffffffff 1164 DATA masks<>+0x88(SB)/4, $0x00000000 1165 DATA masks<>+0x8c(SB)/4, $0x00000000 1166 1167 DATA masks<>+0x90(SB)/4, $0xffffffff 1168 DATA masks<>+0x94(SB)/4, $0xffffffff 1169 DATA masks<>+0x98(SB)/4, $0x000000ff 1170 DATA masks<>+0x9c(SB)/4, $0x00000000 1171 1172 DATA masks<>+0xa0(SB)/4, $0xffffffff 1173 DATA masks<>+0xa4(SB)/4, $0xffffffff 1174 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1175 DATA masks<>+0xac(SB)/4, $0x00000000 1176 1177 DATA masks<>+0xb0(SB)/4, $0xffffffff 1178 DATA masks<>+0xb4(SB)/4, $0xffffffff 1179 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1180 DATA masks<>+0xbc(SB)/4, $0x00000000 1181 1182 DATA masks<>+0xc0(SB)/4, $0xffffffff 1183 DATA masks<>+0xc4(SB)/4, $0xffffffff 1184 DATA masks<>+0xc8(SB)/4, $0xffffffff 1185 DATA masks<>+0xcc(SB)/4, $0x00000000 1186 1187 DATA masks<>+0xd0(SB)/4, $0xffffffff 1188 DATA masks<>+0xd4(SB)/4, $0xffffffff 1189 DATA masks<>+0xd8(SB)/4, $0xffffffff 1190 DATA masks<>+0xdc(SB)/4, $0x000000ff 1191 1192 DATA masks<>+0xe0(SB)/4, $0xffffffff 1193 DATA masks<>+0xe4(SB)/4, $0xffffffff 1194 DATA masks<>+0xe8(SB)/4, $0xffffffff 1195 DATA masks<>+0xec(SB)/4, $0x0000ffff 1196 1197 DATA masks<>+0xf0(SB)/4, $0xffffffff 1198 DATA masks<>+0xf4(SB)/4, $0xffffffff 1199 DATA masks<>+0xf8(SB)/4, $0xffffffff 1200 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1201 1202 GLOBL masks<>(SB),RODATA,$256 1203 1204 // these are arguments to pshufb. They move data down from 1205 // the high bytes of the register to the low bytes of the register. 1206 // index is how many bytes to move. 1207 DATA shifts<>+0x00(SB)/4, $0x00000000 1208 DATA shifts<>+0x04(SB)/4, $0x00000000 1209 DATA shifts<>+0x08(SB)/4, $0x00000000 1210 DATA shifts<>+0x0c(SB)/4, $0x00000000 1211 1212 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1213 DATA shifts<>+0x14(SB)/4, $0xffffffff 1214 DATA shifts<>+0x18(SB)/4, $0xffffffff 1215 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1216 1217 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1218 DATA shifts<>+0x24(SB)/4, $0xffffffff 1219 DATA shifts<>+0x28(SB)/4, $0xffffffff 1220 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1221 1222 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1223 DATA shifts<>+0x34(SB)/4, $0xffffffff 1224 DATA shifts<>+0x38(SB)/4, $0xffffffff 1225 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1226 1227 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1228 DATA shifts<>+0x44(SB)/4, $0xffffffff 1229 DATA shifts<>+0x48(SB)/4, $0xffffffff 1230 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1231 1232 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1233 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1234 DATA shifts<>+0x58(SB)/4, $0xffffffff 1235 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1236 1237 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1238 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1239 DATA shifts<>+0x68(SB)/4, $0xffffffff 1240 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1241 1242 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1243 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1244 DATA shifts<>+0x78(SB)/4, $0xffffffff 1245 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1246 1247 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1248 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1249 DATA shifts<>+0x88(SB)/4, $0xffffffff 1250 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1251 1252 DATA shifts<>+0x90(SB)/4, $0x0a090807 1253 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1254 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1255 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1256 1257 DATA shifts<>+0xa0(SB)/4, $0x09080706 1258 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1259 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1260 DATA shifts<>+0xac(SB)/4, $0xffffffff 1261 1262 DATA shifts<>+0xb0(SB)/4, $0x08070605 1263 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1264 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1265 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1266 1267 DATA shifts<>+0xc0(SB)/4, $0x07060504 1268 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1269 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1270 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1271 1272 DATA shifts<>+0xd0(SB)/4, $0x06050403 1273 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1274 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1275 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1276 1277 DATA shifts<>+0xe0(SB)/4, $0x05040302 1278 DATA shifts<>+0xe4(SB)/4, $0x09080706 1279 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1280 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1281 1282 DATA shifts<>+0xf0(SB)/4, $0x04030201 1283 DATA shifts<>+0xf4(SB)/4, $0x08070605 1284 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1285 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1286 1287 GLOBL shifts<>(SB),RODATA,$256 1288 1289 TEXT ·checkASM(SB),NOSPLIT,$0-1 1290 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1291 MOVL $masks<>(SB), AX 1292 MOVL $shifts<>(SB), BX 1293 ORL BX, AX 1294 TESTL $15, AX 1295 SETEQ ret+0(FP) 1296 RET 1297 1298 TEXT runtime·return0(SB), NOSPLIT, $0 1299 MOVL $0, AX 1300 RET 1301 1302 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1303 // Must obey the gcc calling convention. 1304 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1305 get_tls(CX) 1306 MOVL g(CX), AX 1307 MOVL g_m(AX), AX 1308 MOVL m_curg(AX), AX 1309 MOVL (g_stack+stack_hi)(AX), AX 1310 RET 1311 1312 // The top-most function running on a goroutine 1313 // returns to goexit+PCQuantum. 1314 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1315 BYTE $0x90 // NOP 1316 CALL runtime·goexit1(SB) // does not return 1317 // traceback from goexit1 must hit code range of goexit 1318 BYTE $0x90 // NOP 1319 1320 // Add a module's moduledata to the linked list of moduledata objects. This 1321 // is called from .init_array by a function generated in the linker and so 1322 // follows the platform ABI wrt register preservation -- it only touches AX, 1323 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1324 // instead the pointer to the moduledata is passed in AX. 1325 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1326 MOVL runtime·lastmoduledatap(SB), DX 1327 MOVL AX, moduledata_next(DX) 1328 MOVL AX, runtime·lastmoduledatap(SB) 1329 RET 1330 1331 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1332 MOVL a+0(FP), AX 1333 MOVL AX, 0(SP) 1334 MOVL $0, 4(SP) 1335 FMOVV 0(SP), F0 1336 FMOVDP F0, ret+4(FP) 1337 RET 1338 1339 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1340 FMOVD a+0(FP), F0 1341 FSTCW 0(SP) 1342 FLDCW runtime·controlWord64trunc(SB) 1343 FMOVVP F0, 4(SP) 1344 FLDCW 0(SP) 1345 MOVL 4(SP), AX 1346 MOVL AX, ret+8(FP) 1347 RET 1348 1349 // gcWriteBarrier performs a heap pointer write and informs the GC. 1350 // 1351 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1352 // - DI is the destination of the write 1353 // - AX is the value being written at DI 1354 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1355 // but may clobber others (e.g., SSE registers). 1356 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28 1357 // Save the registers clobbered by the fast path. This is slightly 1358 // faster than having the caller spill these. 1359 MOVL CX, 20(SP) 1360 MOVL BX, 24(SP) 1361 // TODO: Consider passing g.m.p in as an argument so they can be shared 1362 // across a sequence of write barriers. 1363 get_tls(BX) 1364 MOVL g(BX), BX 1365 MOVL g_m(BX), BX 1366 MOVL m_p(BX), BX 1367 MOVL (p_wbBuf+wbBuf_next)(BX), CX 1368 // Increment wbBuf.next position. 1369 LEAL 8(CX), CX 1370 MOVL CX, (p_wbBuf+wbBuf_next)(BX) 1371 CMPL CX, (p_wbBuf+wbBuf_end)(BX) 1372 // Record the write. 1373 MOVL AX, -8(CX) // Record value 1374 MOVL (DI), BX // TODO: This turns bad writes into bad reads. 1375 MOVL BX, -4(CX) // Record *slot 1376 // Is the buffer full? (flags set in CMPL above) 1377 JEQ flush 1378 ret: 1379 MOVL 20(SP), CX 1380 MOVL 24(SP), BX 1381 // Do the write. 1382 MOVL AX, (DI) 1383 RET 1384 1385 flush: 1386 // Save all general purpose registers since these could be 1387 // clobbered by wbBufFlush and were not saved by the caller. 1388 MOVL DI, 0(SP) // Also first argument to wbBufFlush 1389 MOVL AX, 4(SP) // Also second argument to wbBufFlush 1390 // BX already saved 1391 // CX already saved 1392 MOVL DX, 8(SP) 1393 MOVL BP, 12(SP) 1394 MOVL SI, 16(SP) 1395 // DI already saved 1396 1397 // This takes arguments DI and AX 1398 CALL runtime·wbBufFlush(SB) 1399 1400 MOVL 0(SP), DI 1401 MOVL 4(SP), AX 1402 MOVL 8(SP), DX 1403 MOVL 12(SP), BP 1404 MOVL 16(SP), SI 1405 JMP ret 1406 1407 // Note: these functions use a special calling convention to save generated code space. 1408 // Arguments are passed in registers, but the space for those arguments are allocated 1409 // in the caller's stack frame. These stubs write the args into that stack space and 1410 // then tail call to the corresponding runtime handler. 1411 // The tail call makes these stubs disappear in backtraces. 1412 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8 1413 MOVL AX, x+0(FP) 1414 MOVL CX, y+4(FP) 1415 JMP runtime·goPanicIndex(SB) 1416 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8 1417 MOVL AX, x+0(FP) 1418 MOVL CX, y+4(FP) 1419 JMP runtime·goPanicIndexU(SB) 1420 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8 1421 MOVL CX, x+0(FP) 1422 MOVL DX, y+4(FP) 1423 JMP runtime·goPanicSliceAlen(SB) 1424 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8 1425 MOVL CX, x+0(FP) 1426 MOVL DX, y+4(FP) 1427 JMP runtime·goPanicSliceAlenU(SB) 1428 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8 1429 MOVL CX, x+0(FP) 1430 MOVL DX, y+4(FP) 1431 JMP runtime·goPanicSliceAcap(SB) 1432 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8 1433 MOVL CX, x+0(FP) 1434 MOVL DX, y+4(FP) 1435 JMP runtime·goPanicSliceAcapU(SB) 1436 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8 1437 MOVL AX, x+0(FP) 1438 MOVL CX, y+4(FP) 1439 JMP runtime·goPanicSliceB(SB) 1440 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8 1441 MOVL AX, x+0(FP) 1442 MOVL CX, y+4(FP) 1443 JMP runtime·goPanicSliceBU(SB) 1444 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8 1445 MOVL DX, x+0(FP) 1446 MOVL BX, y+4(FP) 1447 JMP runtime·goPanicSlice3Alen(SB) 1448 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8 1449 MOVL DX, x+0(FP) 1450 MOVL BX, y+4(FP) 1451 JMP runtime·goPanicSlice3AlenU(SB) 1452 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8 1453 MOVL DX, x+0(FP) 1454 MOVL BX, y+4(FP) 1455 JMP runtime·goPanicSlice3Acap(SB) 1456 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8 1457 MOVL DX, x+0(FP) 1458 MOVL BX, y+4(FP) 1459 JMP runtime·goPanicSlice3AcapU(SB) 1460 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8 1461 MOVL CX, x+0(FP) 1462 MOVL DX, y+4(FP) 1463 JMP runtime·goPanicSlice3B(SB) 1464 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8 1465 MOVL CX, x+0(FP) 1466 MOVL DX, y+4(FP) 1467 JMP runtime·goPanicSlice3BU(SB) 1468 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8 1469 MOVL AX, x+0(FP) 1470 MOVL CX, y+4(FP) 1471 JMP runtime·goPanicSlice3C(SB) 1472 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8 1473 MOVL AX, x+0(FP) 1474 MOVL CX, y+4(FP) 1475 JMP runtime·goPanicSlice3CU(SB) 1476 1477 // Extended versions for 64-bit indexes. 1478 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12 1479 MOVL SI, hi+0(FP) 1480 MOVL AX, lo+4(FP) 1481 MOVL CX, y+8(FP) 1482 JMP runtime·goPanicExtendIndex(SB) 1483 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12 1484 MOVL SI, hi+0(FP) 1485 MOVL AX, lo+4(FP) 1486 MOVL CX, y+8(FP) 1487 JMP runtime·goPanicExtendIndexU(SB) 1488 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12 1489 MOVL SI, hi+0(FP) 1490 MOVL CX, lo+4(FP) 1491 MOVL DX, y+8(FP) 1492 JMP runtime·goPanicExtendSliceAlen(SB) 1493 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12 1494 MOVL SI, hi+0(FP) 1495 MOVL CX, lo+4(FP) 1496 MOVL DX, y+8(FP) 1497 JMP runtime·goPanicExtendSliceAlenU(SB) 1498 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12 1499 MOVL SI, hi+0(FP) 1500 MOVL CX, lo+4(FP) 1501 MOVL DX, y+8(FP) 1502 JMP runtime·goPanicExtendSliceAcap(SB) 1503 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12 1504 MOVL SI, hi+0(FP) 1505 MOVL CX, lo+4(FP) 1506 MOVL DX, y+8(FP) 1507 JMP runtime·goPanicExtendSliceAcapU(SB) 1508 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12 1509 MOVL SI, hi+0(FP) 1510 MOVL AX, lo+4(FP) 1511 MOVL CX, y+8(FP) 1512 JMP runtime·goPanicExtendSliceB(SB) 1513 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12 1514 MOVL SI, hi+0(FP) 1515 MOVL AX, lo+4(FP) 1516 MOVL CX, y+8(FP) 1517 JMP runtime·goPanicExtendSliceBU(SB) 1518 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12 1519 MOVL SI, hi+0(FP) 1520 MOVL DX, lo+4(FP) 1521 MOVL BX, y+8(FP) 1522 JMP runtime·goPanicExtendSlice3Alen(SB) 1523 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12 1524 MOVL SI, hi+0(FP) 1525 MOVL DX, lo+4(FP) 1526 MOVL BX, y+8(FP) 1527 JMP runtime·goPanicExtendSlice3AlenU(SB) 1528 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12 1529 MOVL SI, hi+0(FP) 1530 MOVL DX, lo+4(FP) 1531 MOVL BX, y+8(FP) 1532 JMP runtime·goPanicExtendSlice3Acap(SB) 1533 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12 1534 MOVL SI, hi+0(FP) 1535 MOVL DX, lo+4(FP) 1536 MOVL BX, y+8(FP) 1537 JMP runtime·goPanicExtendSlice3AcapU(SB) 1538 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12 1539 MOVL SI, hi+0(FP) 1540 MOVL CX, lo+4(FP) 1541 MOVL DX, y+8(FP) 1542 JMP runtime·goPanicExtendSlice3B(SB) 1543 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12 1544 MOVL SI, hi+0(FP) 1545 MOVL CX, lo+4(FP) 1546 MOVL DX, y+8(FP) 1547 JMP runtime·goPanicExtendSlice3BU(SB) 1548 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12 1549 MOVL SI, hi+0(FP) 1550 MOVL AX, lo+4(FP) 1551 MOVL CX, y+8(FP) 1552 JMP runtime·goPanicExtendSlice3C(SB) 1553 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 1554 MOVL SI, hi+0(FP) 1555 MOVL AX, lo+4(FP) 1556 MOVL CX, y+8(FP) 1557 JMP runtime·goPanicExtendSlice3CU(SB) 1558 1559 #ifdef GOOS_android 1560 // Use the free TLS_SLOT_APP slot #2 on Android Q. 1561 // Earlier androids are set up in gcc_android.c. 1562 DATA runtime·tls_g+0(SB)/4, $8 1563 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1564 #endif