github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtime·rt0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtime·newosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtime·rt0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtime·g0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 // first see if CPUID instruction is supported. 113 PUSHFL 114 PUSHFL 115 XORL $(1<<21), 0(SP) // flip ID bit 116 POPFL 117 PUSHFL 118 POPL AX 119 XORL 0(SP), AX 120 POPFL // restore EFLAGS 121 TESTL $(1<<21), AX 122 JNE has_cpuid 123 124 bad_proc: // show that the program requires MMX. 125 MOVL $2, 0(SP) 126 MOVL $bad_proc_msg<>(SB), 4(SP) 127 MOVL $0x3d, 8(SP) 128 CALL runtime·write(SB) 129 MOVL $1, 0(SP) 130 CALL runtime·exit(SB) 131 CALL runtime·abort(SB) 132 133 has_cpuid: 134 MOVL $0, AX 135 CPUID 136 MOVL AX, SI 137 CMPL AX, $0 138 JE nocpuinfo 139 140 // Figure out how to serialize RDTSC. 141 // On Intel processors LFENCE is enough. AMD requires MFENCE. 142 // Don't know about the rest, so let's do MFENCE. 143 CMPL BX, $0x756E6547 // "Genu" 144 JNE notintel 145 CMPL DX, $0x49656E69 // "ineI" 146 JNE notintel 147 CMPL CX, $0x6C65746E // "ntel" 148 JNE notintel 149 MOVB $1, runtime·isIntel(SB) 150 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 151 notintel: 152 153 // Load EAX=1 cpuid flags 154 MOVL $1, AX 155 CPUID 156 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 157 MOVL AX, runtime·processorVersionInfo(SB) 158 159 // Check for MMX support 160 TESTL $(1<<23), DX // MMX 161 JZ bad_proc 162 163 nocpuinfo: 164 // if there is an _cgo_init, call it to let it 165 // initialize and to set up GS. if not, 166 // we set up GS ourselves. 167 MOVL _cgo_init(SB), AX 168 TESTL AX, AX 169 JZ needtls 170 #ifdef GOOS_android 171 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 172 // Compensate for tls_g (+8). 173 MOVL -8(TLS), BX 174 MOVL BX, 12(SP) 175 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 176 #else 177 MOVL $0, BX 178 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS 179 MOVL BX, 8(SP) 180 #endif 181 MOVL $setg_gcc<>(SB), BX 182 MOVL BX, 4(SP) // arg 2: setg_gcc 183 MOVL BP, 0(SP) // arg 1: g0 184 CALL AX 185 186 // update stackguard after _cgo_init 187 MOVL $runtime·g0(SB), CX 188 MOVL (g_stack+stack_lo)(CX), AX 189 ADDL $const__StackGuard, AX 190 MOVL AX, g_stackguard0(CX) 191 MOVL AX, g_stackguard1(CX) 192 193 #ifndef GOOS_windows 194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 195 JMP ok 196 #endif 197 needtls: 198 #ifdef GOOS_plan9 199 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 200 JMP ok 201 #endif 202 #ifdef GOOS_darwin 203 // skip runtime·ldt0setup(SB) on Darwin 204 JMP ok 205 #endif 206 207 // set up %gs 208 CALL ldt0setup<>(SB) 209 210 // store through it, to make sure it works 211 get_tls(BX) 212 MOVL $0x123, g(BX) 213 MOVL runtime·m0+m_tls(SB), AX 214 CMPL AX, $0x123 215 JEQ ok 216 MOVL AX, 0 // abort 217 ok: 218 // set up m and g "registers" 219 get_tls(BX) 220 LEAL runtime·g0(SB), DX 221 MOVL DX, g(BX) 222 LEAL runtime·m0(SB), AX 223 224 // save m->g0 = g0 225 MOVL DX, m_g0(AX) 226 // save g0->m = m0 227 MOVL AX, g_m(DX) 228 229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 230 231 // convention is D is always cleared 232 CLD 233 234 CALL runtime·check(SB) 235 236 // saved argc, argv 237 MOVL 120(SP), AX 238 MOVL AX, 0(SP) 239 MOVL 124(SP), AX 240 MOVL AX, 4(SP) 241 CALL runtime·args(SB) 242 CALL runtime·osinit(SB) 243 CALL runtime·schedinit(SB) 244 245 // create a new goroutine to start program 246 PUSHL $runtime·mainPC(SB) // entry 247 PUSHL $0 // arg size 248 CALL runtime·newproc(SB) 249 POPL AX 250 POPL AX 251 252 // start this M 253 CALL runtime·mstart(SB) 254 255 CALL runtime·abort(SB) 256 RET 257 258 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n" 259 GLOBL bad_proc_msg<>(SB), RODATA, $61 260 261 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 262 GLOBL runtime·mainPC(SB),RODATA,$4 263 264 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 265 INT $3 266 RET 267 268 TEXT runtime·asminit(SB),NOSPLIT,$0-0 269 // Linux and MinGW start the FPU in extended double precision. 270 // Other operating systems use double precision. 271 // Change to double precision to match them, 272 // and to match other hardware that only has double. 273 FLDCW runtime·controlWord64(SB) 274 RET 275 276 /* 277 * go-routine 278 */ 279 280 // void gosave(Gobuf*) 281 // save state in Gobuf; setjmp 282 TEXT runtime·gosave(SB), NOSPLIT, $0-4 283 MOVL buf+0(FP), AX // gobuf 284 LEAL buf+0(FP), BX // caller's SP 285 MOVL BX, gobuf_sp(AX) 286 MOVL 0(SP), BX // caller's PC 287 MOVL BX, gobuf_pc(AX) 288 MOVL $0, gobuf_ret(AX) 289 // Assert ctxt is zero. See func save. 290 MOVL gobuf_ctxt(AX), BX 291 TESTL BX, BX 292 JZ 2(PC) 293 CALL runtime·badctxt(SB) 294 get_tls(CX) 295 MOVL g(CX), BX 296 MOVL BX, gobuf_g(AX) 297 RET 298 299 // void gogo(Gobuf*) 300 // restore state from Gobuf; longjmp 301 TEXT runtime·gogo(SB), NOSPLIT, $8-4 302 MOVL buf+0(FP), BX // gobuf 303 MOVL gobuf_g(BX), DX 304 MOVL 0(DX), CX // make sure g != nil 305 get_tls(CX) 306 MOVL DX, g(CX) 307 MOVL gobuf_sp(BX), SP // restore SP 308 MOVL gobuf_ret(BX), AX 309 MOVL gobuf_ctxt(BX), DX 310 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 311 MOVL $0, gobuf_ret(BX) 312 MOVL $0, gobuf_ctxt(BX) 313 MOVL gobuf_pc(BX), BX 314 JMP BX 315 316 // func mcall(fn func(*g)) 317 // Switch to m->g0's stack, call fn(g). 318 // Fn must never return. It should gogo(&g->sched) 319 // to keep running g. 320 TEXT runtime·mcall(SB), NOSPLIT, $0-4 321 MOVL fn+0(FP), DI 322 323 get_tls(DX) 324 MOVL g(DX), AX // save state in g->sched 325 MOVL 0(SP), BX // caller's PC 326 MOVL BX, (g_sched+gobuf_pc)(AX) 327 LEAL fn+0(FP), BX // caller's SP 328 MOVL BX, (g_sched+gobuf_sp)(AX) 329 MOVL AX, (g_sched+gobuf_g)(AX) 330 331 // switch to m->g0 & its stack, call fn 332 MOVL g(DX), BX 333 MOVL g_m(BX), BX 334 MOVL m_g0(BX), SI 335 CMPL SI, AX // if g == m->g0 call badmcall 336 JNE 3(PC) 337 MOVL $runtime·badmcall(SB), AX 338 JMP AX 339 MOVL SI, g(DX) // g = m->g0 340 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 341 PUSHL AX 342 MOVL DI, DX 343 MOVL 0(DI), DI 344 CALL DI 345 POPL AX 346 MOVL $runtime·badmcall2(SB), AX 347 JMP AX 348 RET 349 350 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 351 // of the G stack. We need to distinguish the routine that 352 // lives at the bottom of the G stack from the one that lives 353 // at the top of the system stack because the one at the top of 354 // the system stack terminates the stack walk (see topofstack()). 355 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 356 RET 357 358 // func systemstack(fn func()) 359 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 360 MOVL fn+0(FP), DI // DI = fn 361 get_tls(CX) 362 MOVL g(CX), AX // AX = g 363 MOVL g_m(AX), BX // BX = m 364 365 CMPL AX, m_gsignal(BX) 366 JEQ noswitch 367 368 MOVL m_g0(BX), DX // DX = g0 369 CMPL AX, DX 370 JEQ noswitch 371 372 CMPL AX, m_curg(BX) 373 JNE bad 374 375 // switch stacks 376 // save our state in g->sched. Pretend to 377 // be systemstack_switch if the G stack is scanned. 378 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 379 MOVL SP, (g_sched+gobuf_sp)(AX) 380 MOVL AX, (g_sched+gobuf_g)(AX) 381 382 // switch to g0 383 get_tls(CX) 384 MOVL DX, g(CX) 385 MOVL (g_sched+gobuf_sp)(DX), BX 386 // make it look like mstart called systemstack on g0, to stop traceback 387 SUBL $4, BX 388 MOVL $runtime·mstart(SB), DX 389 MOVL DX, 0(BX) 390 MOVL BX, SP 391 392 // call target function 393 MOVL DI, DX 394 MOVL 0(DI), DI 395 CALL DI 396 397 // switch back to g 398 get_tls(CX) 399 MOVL g(CX), AX 400 MOVL g_m(AX), BX 401 MOVL m_curg(BX), AX 402 MOVL AX, g(CX) 403 MOVL (g_sched+gobuf_sp)(AX), SP 404 MOVL $0, (g_sched+gobuf_sp)(AX) 405 RET 406 407 noswitch: 408 // already on system stack; tail call the function 409 // Using a tail call here cleans up tracebacks since we won't stop 410 // at an intermediate systemstack. 411 MOVL DI, DX 412 MOVL 0(DI), DI 413 JMP DI 414 415 bad: 416 // Bad: g is not gsignal, not g0, not curg. What is it? 417 // Hide call from linker nosplit analysis. 418 MOVL $runtime·badsystemstack(SB), AX 419 CALL AX 420 INT $3 421 422 /* 423 * support for morestack 424 */ 425 426 // Called during function prolog when more stack is needed. 427 // 428 // The traceback routines see morestack on a g0 as being 429 // the top of a stack (for example, morestack calling newstack 430 // calling the scheduler calling newm calling gc), so we must 431 // record an argument size. For that purpose, it has no arguments. 432 TEXT runtime·morestack(SB),NOSPLIT,$0-0 433 // Cannot grow scheduler stack (m->g0). 434 get_tls(CX) 435 MOVL g(CX), BX 436 MOVL g_m(BX), BX 437 MOVL m_g0(BX), SI 438 CMPL g(CX), SI 439 JNE 3(PC) 440 CALL runtime·badmorestackg0(SB) 441 CALL runtime·abort(SB) 442 443 // Cannot grow signal stack. 444 MOVL m_gsignal(BX), SI 445 CMPL g(CX), SI 446 JNE 3(PC) 447 CALL runtime·badmorestackgsignal(SB) 448 CALL runtime·abort(SB) 449 450 // Called from f. 451 // Set m->morebuf to f's caller. 452 NOP SP // tell vet SP changed - stop checking offsets 453 MOVL 4(SP), DI // f's caller's PC 454 MOVL DI, (m_morebuf+gobuf_pc)(BX) 455 LEAL 8(SP), CX // f's caller's SP 456 MOVL CX, (m_morebuf+gobuf_sp)(BX) 457 get_tls(CX) 458 MOVL g(CX), SI 459 MOVL SI, (m_morebuf+gobuf_g)(BX) 460 461 // Set g->sched to context in f. 462 MOVL 0(SP), AX // f's PC 463 MOVL AX, (g_sched+gobuf_pc)(SI) 464 MOVL SI, (g_sched+gobuf_g)(SI) 465 LEAL 4(SP), AX // f's SP 466 MOVL AX, (g_sched+gobuf_sp)(SI) 467 MOVL DX, (g_sched+gobuf_ctxt)(SI) 468 469 // Call newstack on m->g0's stack. 470 MOVL m_g0(BX), BP 471 MOVL BP, g(CX) 472 MOVL (g_sched+gobuf_sp)(BP), AX 473 MOVL -4(AX), BX // fault if CALL would, before smashing SP 474 MOVL AX, SP 475 CALL runtime·newstack(SB) 476 CALL runtime·abort(SB) // crash if newstack returns 477 RET 478 479 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 480 MOVL $0, DX 481 JMP runtime·morestack(SB) 482 483 // reflectcall: call a function with the given argument list 484 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 485 // we don't have variable-sized frames, so we use a small number 486 // of constant-sized-frame functions to encode a few bits of size in the pc. 487 // Caution: ugly multiline assembly macros in your future! 488 489 #define DISPATCH(NAME,MAXSIZE) \ 490 CMPL CX, $MAXSIZE; \ 491 JA 3(PC); \ 492 MOVL $NAME(SB), AX; \ 493 JMP AX 494 // Note: can't just "JMP NAME(SB)" - bad inlining results. 495 496 TEXT ·reflectcall(SB), NOSPLIT, $0-20 497 MOVL argsize+12(FP), CX 498 DISPATCH(runtime·call16, 16) 499 DISPATCH(runtime·call32, 32) 500 DISPATCH(runtime·call64, 64) 501 DISPATCH(runtime·call128, 128) 502 DISPATCH(runtime·call256, 256) 503 DISPATCH(runtime·call512, 512) 504 DISPATCH(runtime·call1024, 1024) 505 DISPATCH(runtime·call2048, 2048) 506 DISPATCH(runtime·call4096, 4096) 507 DISPATCH(runtime·call8192, 8192) 508 DISPATCH(runtime·call16384, 16384) 509 DISPATCH(runtime·call32768, 32768) 510 DISPATCH(runtime·call65536, 65536) 511 DISPATCH(runtime·call131072, 131072) 512 DISPATCH(runtime·call262144, 262144) 513 DISPATCH(runtime·call524288, 524288) 514 DISPATCH(runtime·call1048576, 1048576) 515 DISPATCH(runtime·call2097152, 2097152) 516 DISPATCH(runtime·call4194304, 4194304) 517 DISPATCH(runtime·call8388608, 8388608) 518 DISPATCH(runtime·call16777216, 16777216) 519 DISPATCH(runtime·call33554432, 33554432) 520 DISPATCH(runtime·call67108864, 67108864) 521 DISPATCH(runtime·call134217728, 134217728) 522 DISPATCH(runtime·call268435456, 268435456) 523 DISPATCH(runtime·call536870912, 536870912) 524 DISPATCH(runtime·call1073741824, 1073741824) 525 MOVL $runtime·badreflectcall(SB), AX 526 JMP AX 527 528 #define CALLFN(NAME,MAXSIZE) \ 529 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 530 NO_LOCAL_POINTERS; \ 531 /* copy arguments to stack */ \ 532 MOVL argptr+8(FP), SI; \ 533 MOVL argsize+12(FP), CX; \ 534 MOVL SP, DI; \ 535 REP;MOVSB; \ 536 /* call function */ \ 537 MOVL f+4(FP), DX; \ 538 MOVL (DX), AX; \ 539 PCDATA $PCDATA_StackMapIndex, $0; \ 540 CALL AX; \ 541 /* copy return values back */ \ 542 MOVL argtype+0(FP), DX; \ 543 MOVL argptr+8(FP), DI; \ 544 MOVL argsize+12(FP), CX; \ 545 MOVL retoffset+16(FP), BX; \ 546 MOVL SP, SI; \ 547 ADDL BX, DI; \ 548 ADDL BX, SI; \ 549 SUBL BX, CX; \ 550 CALL callRet<>(SB); \ 551 RET 552 553 // callRet copies return values back at the end of call*. This is a 554 // separate function so it can allocate stack space for the arguments 555 // to reflectcallmove. It does not follow the Go ABI; it expects its 556 // arguments in registers. 557 TEXT callRet<>(SB), NOSPLIT, $16-0 558 MOVL DX, 0(SP) 559 MOVL DI, 4(SP) 560 MOVL SI, 8(SP) 561 MOVL CX, 12(SP) 562 CALL runtime·reflectcallmove(SB) 563 RET 564 565 CALLFN(·call16, 16) 566 CALLFN(·call32, 32) 567 CALLFN(·call64, 64) 568 CALLFN(·call128, 128) 569 CALLFN(·call256, 256) 570 CALLFN(·call512, 512) 571 CALLFN(·call1024, 1024) 572 CALLFN(·call2048, 2048) 573 CALLFN(·call4096, 4096) 574 CALLFN(·call8192, 8192) 575 CALLFN(·call16384, 16384) 576 CALLFN(·call32768, 32768) 577 CALLFN(·call65536, 65536) 578 CALLFN(·call131072, 131072) 579 CALLFN(·call262144, 262144) 580 CALLFN(·call524288, 524288) 581 CALLFN(·call1048576, 1048576) 582 CALLFN(·call2097152, 2097152) 583 CALLFN(·call4194304, 4194304) 584 CALLFN(·call8388608, 8388608) 585 CALLFN(·call16777216, 16777216) 586 CALLFN(·call33554432, 33554432) 587 CALLFN(·call67108864, 67108864) 588 CALLFN(·call134217728, 134217728) 589 CALLFN(·call268435456, 268435456) 590 CALLFN(·call536870912, 536870912) 591 CALLFN(·call1073741824, 1073741824) 592 593 TEXT runtime·procyield(SB),NOSPLIT,$0-0 594 MOVL cycles+0(FP), AX 595 again: 596 PAUSE 597 SUBL $1, AX 598 JNZ again 599 RET 600 601 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 602 // Stores are already ordered on x86, so this is just a 603 // compile barrier. 604 RET 605 606 // void jmpdefer(fn, sp); 607 // called from deferreturn. 608 // 1. pop the caller 609 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 610 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 611 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 612 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 613 // 3. jmp to the argument 614 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 615 MOVL fv+0(FP), DX // fn 616 MOVL argp+4(FP), BX // caller sp 617 LEAL -4(BX), SP // caller sp after CALL 618 #ifdef GOBUILDMODE_shared 619 SUBL $16, (SP) // return to CALL again 620 #else 621 SUBL $5, (SP) // return to CALL again 622 #endif 623 MOVL 0(DX), BX 624 JMP BX // but first run the deferred function 625 626 // Save state of caller into g->sched. 627 TEXT gosave<>(SB),NOSPLIT,$0 628 PUSHL AX 629 PUSHL BX 630 get_tls(BX) 631 MOVL g(BX), BX 632 LEAL arg+0(FP), AX 633 MOVL AX, (g_sched+gobuf_sp)(BX) 634 MOVL -4(AX), AX 635 MOVL AX, (g_sched+gobuf_pc)(BX) 636 MOVL $0, (g_sched+gobuf_ret)(BX) 637 // Assert ctxt is zero. See func save. 638 MOVL (g_sched+gobuf_ctxt)(BX), AX 639 TESTL AX, AX 640 JZ 2(PC) 641 CALL runtime·badctxt(SB) 642 POPL BX 643 POPL AX 644 RET 645 646 // func asmcgocall(fn, arg unsafe.Pointer) int32 647 // Call fn(arg) on the scheduler stack, 648 // aligned appropriately for the gcc ABI. 649 // See cgocall.go for more details. 650 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 651 MOVL fn+0(FP), AX 652 MOVL arg+4(FP), BX 653 654 MOVL SP, DX 655 656 // Figure out if we need to switch to m->g0 stack. 657 // We get called to create new OS threads too, and those 658 // come in on the m->g0 stack already. 659 get_tls(CX) 660 MOVL g(CX), BP 661 CMPL BP, $0 662 JEQ nosave // Don't even have a G yet. 663 MOVL g_m(BP), BP 664 MOVL m_g0(BP), SI 665 MOVL g(CX), DI 666 CMPL SI, DI 667 JEQ noswitch 668 CMPL DI, m_gsignal(BP) 669 JEQ noswitch 670 CALL gosave<>(SB) 671 get_tls(CX) 672 MOVL SI, g(CX) 673 MOVL (g_sched+gobuf_sp)(SI), SP 674 675 noswitch: 676 // Now on a scheduling stack (a pthread-created stack). 677 SUBL $32, SP 678 ANDL $~15, SP // alignment, perhaps unnecessary 679 MOVL DI, 8(SP) // save g 680 MOVL (g_stack+stack_hi)(DI), DI 681 SUBL DX, DI 682 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 683 MOVL BX, 0(SP) // first argument in x86-32 ABI 684 CALL AX 685 686 // Restore registers, g, stack pointer. 687 get_tls(CX) 688 MOVL 8(SP), DI 689 MOVL (g_stack+stack_hi)(DI), SI 690 SUBL 4(SP), SI 691 MOVL DI, g(CX) 692 MOVL SI, SP 693 694 MOVL AX, ret+8(FP) 695 RET 696 nosave: 697 // Now on a scheduling stack (a pthread-created stack). 698 SUBL $32, SP 699 ANDL $~15, SP // alignment, perhaps unnecessary 700 MOVL DX, 4(SP) // save original stack pointer 701 MOVL BX, 0(SP) // first argument in x86-32 ABI 702 CALL AX 703 704 MOVL 4(SP), CX // restore original stack pointer 705 MOVL CX, SP 706 MOVL AX, ret+8(FP) 707 RET 708 709 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 710 // Turn the fn into a Go func (by taking its address) and call 711 // cgocallback_gofunc. 712 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 713 LEAL fn+0(FP), AX 714 MOVL AX, 0(SP) 715 MOVL frame+4(FP), AX 716 MOVL AX, 4(SP) 717 MOVL framesize+8(FP), AX 718 MOVL AX, 8(SP) 719 MOVL ctxt+12(FP), AX 720 MOVL AX, 12(SP) 721 MOVL $runtime·cgocallback_gofunc(SB), AX 722 CALL AX 723 RET 724 725 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 726 // See cgocall.go for more details. 727 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 728 NO_LOCAL_POINTERS 729 730 // If g is nil, Go did not create the current thread. 731 // Call needm to obtain one for temporary use. 732 // In this case, we're running on the thread stack, so there's 733 // lots of space, but the linker doesn't know. Hide the call from 734 // the linker analysis by using an indirect call through AX. 735 get_tls(CX) 736 #ifdef GOOS_windows 737 MOVL $0, BP 738 CMPL CX, $0 739 JEQ 2(PC) // TODO 740 #endif 741 MOVL g(CX), BP 742 CMPL BP, $0 743 JEQ needm 744 MOVL g_m(BP), BP 745 MOVL BP, DX // saved copy of oldm 746 JMP havem 747 needm: 748 MOVL $0, 0(SP) 749 MOVL $runtime·needm(SB), AX 750 CALL AX 751 MOVL 0(SP), DX 752 get_tls(CX) 753 MOVL g(CX), BP 754 MOVL g_m(BP), BP 755 756 // Set m->sched.sp = SP, so that if a panic happens 757 // during the function we are about to execute, it will 758 // have a valid SP to run on the g0 stack. 759 // The next few lines (after the havem label) 760 // will save this SP onto the stack and then write 761 // the same SP back to m->sched.sp. That seems redundant, 762 // but if an unrecovered panic happens, unwindm will 763 // restore the g->sched.sp from the stack location 764 // and then systemstack will try to use it. If we don't set it here, 765 // that restored SP will be uninitialized (typically 0) and 766 // will not be usable. 767 MOVL m_g0(BP), SI 768 MOVL SP, (g_sched+gobuf_sp)(SI) 769 770 havem: 771 // Now there's a valid m, and we're running on its m->g0. 772 // Save current m->g0->sched.sp on stack and then set it to SP. 773 // Save current sp in m->g0->sched.sp in preparation for 774 // switch back to m->curg stack. 775 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 776 MOVL m_g0(BP), SI 777 MOVL (g_sched+gobuf_sp)(SI), AX 778 MOVL AX, 0(SP) 779 MOVL SP, (g_sched+gobuf_sp)(SI) 780 781 // Switch to m->curg stack and call runtime.cgocallbackg. 782 // Because we are taking over the execution of m->curg 783 // but *not* resuming what had been running, we need to 784 // save that information (m->curg->sched) so we can restore it. 785 // We can restore m->curg->sched.sp easily, because calling 786 // runtime.cgocallbackg leaves SP unchanged upon return. 787 // To save m->curg->sched.pc, we push it onto the stack. 788 // This has the added benefit that it looks to the traceback 789 // routine like cgocallbackg is going to return to that 790 // PC (because the frame we allocate below has the same 791 // size as cgocallback_gofunc's frame declared above) 792 // so that the traceback will seamlessly trace back into 793 // the earlier calls. 794 // 795 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 796 // 8(SP) is unused. 797 MOVL m_curg(BP), SI 798 MOVL SI, g(CX) 799 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 800 MOVL (g_sched+gobuf_pc)(SI), BP 801 MOVL BP, -4(DI) 802 MOVL ctxt+12(FP), CX 803 LEAL -(4+12)(DI), SP 804 MOVL DX, 4(SP) 805 MOVL CX, 0(SP) 806 CALL runtime·cgocallbackg(SB) 807 MOVL 4(SP), DX 808 809 // Restore g->sched (== m->curg->sched) from saved values. 810 get_tls(CX) 811 MOVL g(CX), SI 812 MOVL 12(SP), BP 813 MOVL BP, (g_sched+gobuf_pc)(SI) 814 LEAL (12+4)(SP), DI 815 MOVL DI, (g_sched+gobuf_sp)(SI) 816 817 // Switch back to m->g0's stack and restore m->g0->sched.sp. 818 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 819 // so we do not have to restore it.) 820 MOVL g(CX), BP 821 MOVL g_m(BP), BP 822 MOVL m_g0(BP), SI 823 MOVL SI, g(CX) 824 MOVL (g_sched+gobuf_sp)(SI), SP 825 MOVL 0(SP), AX 826 MOVL AX, (g_sched+gobuf_sp)(SI) 827 828 // If the m on entry was nil, we called needm above to borrow an m 829 // for the duration of the call. Since the call is over, return it with dropm. 830 CMPL DX, $0 831 JNE 3(PC) 832 MOVL $runtime·dropm(SB), AX 833 CALL AX 834 835 // Done! 836 RET 837 838 // void setg(G*); set g. for use by needm. 839 TEXT runtime·setg(SB), NOSPLIT, $0-4 840 MOVL gg+0(FP), BX 841 #ifdef GOOS_windows 842 CMPL BX, $0 843 JNE settls 844 MOVL $0, 0x14(FS) 845 RET 846 settls: 847 MOVL g_m(BX), AX 848 LEAL m_tls(AX), AX 849 MOVL AX, 0x14(FS) 850 #endif 851 get_tls(CX) 852 MOVL BX, g(CX) 853 RET 854 855 // void setg_gcc(G*); set g. for use by gcc 856 TEXT setg_gcc<>(SB), NOSPLIT, $0 857 get_tls(AX) 858 MOVL gg+0(FP), DX 859 MOVL DX, g(AX) 860 RET 861 862 TEXT runtime·abort(SB),NOSPLIT,$0-0 863 INT $3 864 loop: 865 JMP loop 866 867 // check that SP is in range [g->stack.lo, g->stack.hi) 868 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 869 get_tls(CX) 870 MOVL g(CX), AX 871 CMPL (g_stack+stack_hi)(AX), SP 872 JHI 2(PC) 873 CALL runtime·abort(SB) 874 CMPL SP, (g_stack+stack_lo)(AX) 875 JHI 2(PC) 876 CALL runtime·abort(SB) 877 RET 878 879 // func cputicks() int64 880 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 881 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 882 JNE done 883 CMPB runtime·lfenceBeforeRdtsc(SB), $1 884 JNE mfence 885 LFENCE 886 JMP done 887 mfence: 888 MFENCE 889 done: 890 RDTSC 891 MOVL AX, ret_lo+0(FP) 892 MOVL DX, ret_hi+4(FP) 893 RET 894 895 TEXT ldt0setup<>(SB),NOSPLIT,$16-0 896 // set up ldt 7 to point at m0.tls 897 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 898 // the entry number is just a hint. setldt will set up GS with what it used. 899 MOVL $7, 0(SP) 900 LEAL runtime·m0+m_tls(SB), AX 901 MOVL AX, 4(SP) 902 MOVL $32, 8(SP) // sizeof(tls array) 903 CALL runtime·setldt(SB) 904 RET 905 906 TEXT runtime·emptyfunc(SB),0,$0-0 907 RET 908 909 // hash function using AES hardware instructions 910 TEXT runtime·memhash(SB),NOSPLIT,$0-16 911 CMPB runtime·useAeshash(SB), $0 912 JEQ noaes 913 MOVL p+0(FP), AX // ptr to data 914 MOVL s+8(FP), BX // size 915 LEAL ret+12(FP), DX 916 JMP aeshashbody<>(SB) 917 noaes: 918 JMP runtime·memhashFallback(SB) 919 920 TEXT runtime·strhash(SB),NOSPLIT,$0-12 921 CMPB runtime·useAeshash(SB), $0 922 JEQ noaes 923 MOVL p+0(FP), AX // ptr to string object 924 MOVL 4(AX), BX // length of string 925 MOVL (AX), AX // string data 926 LEAL ret+8(FP), DX 927 JMP aeshashbody<>(SB) 928 noaes: 929 JMP runtime·strhashFallback(SB) 930 931 // AX: data 932 // BX: length 933 // DX: address to put return value 934 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 935 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 936 PINSRW $4, BX, X0 // 16 bits of length 937 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 938 MOVO X0, X1 // save unscrambled seed 939 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 940 AESENC X0, X0 // scramble seed 941 942 CMPL BX, $16 943 JB aes0to15 944 JE aes16 945 CMPL BX, $32 946 JBE aes17to32 947 CMPL BX, $64 948 JBE aes33to64 949 JMP aes65plus 950 951 aes0to15: 952 TESTL BX, BX 953 JE aes0 954 955 ADDL $16, AX 956 TESTW $0xff0, AX 957 JE endofpage 958 959 // 16 bytes loaded at this address won't cross 960 // a page boundary, so we can load it directly. 961 MOVOU -16(AX), X1 962 ADDL BX, BX 963 PAND masks<>(SB)(BX*8), X1 964 965 final1: 966 AESENC X0, X1 // scramble input, xor in seed 967 AESENC X1, X1 // scramble combo 2 times 968 AESENC X1, X1 969 MOVL X1, (DX) 970 RET 971 972 endofpage: 973 // address ends in 1111xxxx. Might be up against 974 // a page boundary, so load ending at last byte. 975 // Then shift bytes down using pshufb. 976 MOVOU -32(AX)(BX*1), X1 977 ADDL BX, BX 978 PSHUFB shifts<>(SB)(BX*8), X1 979 JMP final1 980 981 aes0: 982 // Return scrambled input seed 983 AESENC X0, X0 984 MOVL X0, (DX) 985 RET 986 987 aes16: 988 MOVOU (AX), X1 989 JMP final1 990 991 aes17to32: 992 // make second starting seed 993 PXOR runtime·aeskeysched+16(SB), X1 994 AESENC X1, X1 995 996 // load data to be hashed 997 MOVOU (AX), X2 998 MOVOU -16(AX)(BX*1), X3 999 1000 // scramble 3 times 1001 AESENC X0, X2 1002 AESENC X1, X3 1003 AESENC X2, X2 1004 AESENC X3, X3 1005 AESENC X2, X2 1006 AESENC X3, X3 1007 1008 // combine results 1009 PXOR X3, X2 1010 MOVL X2, (DX) 1011 RET 1012 1013 aes33to64: 1014 // make 3 more starting seeds 1015 MOVO X1, X2 1016 MOVO X1, X3 1017 PXOR runtime·aeskeysched+16(SB), X1 1018 PXOR runtime·aeskeysched+32(SB), X2 1019 PXOR runtime·aeskeysched+48(SB), X3 1020 AESENC X1, X1 1021 AESENC X2, X2 1022 AESENC X3, X3 1023 1024 MOVOU (AX), X4 1025 MOVOU 16(AX), X5 1026 MOVOU -32(AX)(BX*1), X6 1027 MOVOU -16(AX)(BX*1), X7 1028 1029 AESENC X0, X4 1030 AESENC X1, X5 1031 AESENC X2, X6 1032 AESENC X3, X7 1033 1034 AESENC X4, X4 1035 AESENC X5, X5 1036 AESENC X6, X6 1037 AESENC X7, X7 1038 1039 AESENC X4, X4 1040 AESENC X5, X5 1041 AESENC X6, X6 1042 AESENC X7, X7 1043 1044 PXOR X6, X4 1045 PXOR X7, X5 1046 PXOR X5, X4 1047 MOVL X4, (DX) 1048 RET 1049 1050 aes65plus: 1051 // make 3 more starting seeds 1052 MOVO X1, X2 1053 MOVO X1, X3 1054 PXOR runtime·aeskeysched+16(SB), X1 1055 PXOR runtime·aeskeysched+32(SB), X2 1056 PXOR runtime·aeskeysched+48(SB), X3 1057 AESENC X1, X1 1058 AESENC X2, X2 1059 AESENC X3, X3 1060 1061 // start with last (possibly overlapping) block 1062 MOVOU -64(AX)(BX*1), X4 1063 MOVOU -48(AX)(BX*1), X5 1064 MOVOU -32(AX)(BX*1), X6 1065 MOVOU -16(AX)(BX*1), X7 1066 1067 // scramble state once 1068 AESENC X0, X4 1069 AESENC X1, X5 1070 AESENC X2, X6 1071 AESENC X3, X7 1072 1073 // compute number of remaining 64-byte blocks 1074 DECL BX 1075 SHRL $6, BX 1076 1077 aesloop: 1078 // scramble state, xor in a block 1079 MOVOU (AX), X0 1080 MOVOU 16(AX), X1 1081 MOVOU 32(AX), X2 1082 MOVOU 48(AX), X3 1083 AESENC X0, X4 1084 AESENC X1, X5 1085 AESENC X2, X6 1086 AESENC X3, X7 1087 1088 // scramble state 1089 AESENC X4, X4 1090 AESENC X5, X5 1091 AESENC X6, X6 1092 AESENC X7, X7 1093 1094 ADDL $64, AX 1095 DECL BX 1096 JNE aesloop 1097 1098 // 2 more scrambles to finish 1099 AESENC X4, X4 1100 AESENC X5, X5 1101 AESENC X6, X6 1102 AESENC X7, X7 1103 1104 AESENC X4, X4 1105 AESENC X5, X5 1106 AESENC X6, X6 1107 AESENC X7, X7 1108 1109 PXOR X6, X4 1110 PXOR X7, X5 1111 PXOR X5, X4 1112 MOVL X4, (DX) 1113 RET 1114 1115 TEXT runtime·memhash32(SB),NOSPLIT,$0-12 1116 CMPB runtime·useAeshash(SB), $0 1117 JEQ noaes 1118 MOVL p+0(FP), AX // ptr to data 1119 MOVL h+4(FP), X0 // seed 1120 PINSRD $1, (AX), X0 // data 1121 AESENC runtime·aeskeysched+0(SB), X0 1122 AESENC runtime·aeskeysched+16(SB), X0 1123 AESENC runtime·aeskeysched+32(SB), X0 1124 MOVL X0, ret+8(FP) 1125 RET 1126 noaes: 1127 JMP runtime·memhash32Fallback(SB) 1128 1129 TEXT runtime·memhash64(SB),NOSPLIT,$0-12 1130 CMPB runtime·useAeshash(SB), $0 1131 JEQ noaes 1132 MOVL p+0(FP), AX // ptr to data 1133 MOVQ (AX), X0 // data 1134 PINSRD $2, h+4(FP), X0 // seed 1135 AESENC runtime·aeskeysched+0(SB), X0 1136 AESENC runtime·aeskeysched+16(SB), X0 1137 AESENC runtime·aeskeysched+32(SB), X0 1138 MOVL X0, ret+8(FP) 1139 RET 1140 noaes: 1141 JMP runtime·memhash64Fallback(SB) 1142 1143 // simple mask to get rid of data in the high part of the register. 1144 DATA masks<>+0x00(SB)/4, $0x00000000 1145 DATA masks<>+0x04(SB)/4, $0x00000000 1146 DATA masks<>+0x08(SB)/4, $0x00000000 1147 DATA masks<>+0x0c(SB)/4, $0x00000000 1148 1149 DATA masks<>+0x10(SB)/4, $0x000000ff 1150 DATA masks<>+0x14(SB)/4, $0x00000000 1151 DATA masks<>+0x18(SB)/4, $0x00000000 1152 DATA masks<>+0x1c(SB)/4, $0x00000000 1153 1154 DATA masks<>+0x20(SB)/4, $0x0000ffff 1155 DATA masks<>+0x24(SB)/4, $0x00000000 1156 DATA masks<>+0x28(SB)/4, $0x00000000 1157 DATA masks<>+0x2c(SB)/4, $0x00000000 1158 1159 DATA masks<>+0x30(SB)/4, $0x00ffffff 1160 DATA masks<>+0x34(SB)/4, $0x00000000 1161 DATA masks<>+0x38(SB)/4, $0x00000000 1162 DATA masks<>+0x3c(SB)/4, $0x00000000 1163 1164 DATA masks<>+0x40(SB)/4, $0xffffffff 1165 DATA masks<>+0x44(SB)/4, $0x00000000 1166 DATA masks<>+0x48(SB)/4, $0x00000000 1167 DATA masks<>+0x4c(SB)/4, $0x00000000 1168 1169 DATA masks<>+0x50(SB)/4, $0xffffffff 1170 DATA masks<>+0x54(SB)/4, $0x000000ff 1171 DATA masks<>+0x58(SB)/4, $0x00000000 1172 DATA masks<>+0x5c(SB)/4, $0x00000000 1173 1174 DATA masks<>+0x60(SB)/4, $0xffffffff 1175 DATA masks<>+0x64(SB)/4, $0x0000ffff 1176 DATA masks<>+0x68(SB)/4, $0x00000000 1177 DATA masks<>+0x6c(SB)/4, $0x00000000 1178 1179 DATA masks<>+0x70(SB)/4, $0xffffffff 1180 DATA masks<>+0x74(SB)/4, $0x00ffffff 1181 DATA masks<>+0x78(SB)/4, $0x00000000 1182 DATA masks<>+0x7c(SB)/4, $0x00000000 1183 1184 DATA masks<>+0x80(SB)/4, $0xffffffff 1185 DATA masks<>+0x84(SB)/4, $0xffffffff 1186 DATA masks<>+0x88(SB)/4, $0x00000000 1187 DATA masks<>+0x8c(SB)/4, $0x00000000 1188 1189 DATA masks<>+0x90(SB)/4, $0xffffffff 1190 DATA masks<>+0x94(SB)/4, $0xffffffff 1191 DATA masks<>+0x98(SB)/4, $0x000000ff 1192 DATA masks<>+0x9c(SB)/4, $0x00000000 1193 1194 DATA masks<>+0xa0(SB)/4, $0xffffffff 1195 DATA masks<>+0xa4(SB)/4, $0xffffffff 1196 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1197 DATA masks<>+0xac(SB)/4, $0x00000000 1198 1199 DATA masks<>+0xb0(SB)/4, $0xffffffff 1200 DATA masks<>+0xb4(SB)/4, $0xffffffff 1201 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1202 DATA masks<>+0xbc(SB)/4, $0x00000000 1203 1204 DATA masks<>+0xc0(SB)/4, $0xffffffff 1205 DATA masks<>+0xc4(SB)/4, $0xffffffff 1206 DATA masks<>+0xc8(SB)/4, $0xffffffff 1207 DATA masks<>+0xcc(SB)/4, $0x00000000 1208 1209 DATA masks<>+0xd0(SB)/4, $0xffffffff 1210 DATA masks<>+0xd4(SB)/4, $0xffffffff 1211 DATA masks<>+0xd8(SB)/4, $0xffffffff 1212 DATA masks<>+0xdc(SB)/4, $0x000000ff 1213 1214 DATA masks<>+0xe0(SB)/4, $0xffffffff 1215 DATA masks<>+0xe4(SB)/4, $0xffffffff 1216 DATA masks<>+0xe8(SB)/4, $0xffffffff 1217 DATA masks<>+0xec(SB)/4, $0x0000ffff 1218 1219 DATA masks<>+0xf0(SB)/4, $0xffffffff 1220 DATA masks<>+0xf4(SB)/4, $0xffffffff 1221 DATA masks<>+0xf8(SB)/4, $0xffffffff 1222 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1223 1224 GLOBL masks<>(SB),RODATA,$256 1225 1226 // these are arguments to pshufb. They move data down from 1227 // the high bytes of the register to the low bytes of the register. 1228 // index is how many bytes to move. 1229 DATA shifts<>+0x00(SB)/4, $0x00000000 1230 DATA shifts<>+0x04(SB)/4, $0x00000000 1231 DATA shifts<>+0x08(SB)/4, $0x00000000 1232 DATA shifts<>+0x0c(SB)/4, $0x00000000 1233 1234 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1235 DATA shifts<>+0x14(SB)/4, $0xffffffff 1236 DATA shifts<>+0x18(SB)/4, $0xffffffff 1237 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1238 1239 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1240 DATA shifts<>+0x24(SB)/4, $0xffffffff 1241 DATA shifts<>+0x28(SB)/4, $0xffffffff 1242 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1243 1244 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1245 DATA shifts<>+0x34(SB)/4, $0xffffffff 1246 DATA shifts<>+0x38(SB)/4, $0xffffffff 1247 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1248 1249 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1250 DATA shifts<>+0x44(SB)/4, $0xffffffff 1251 DATA shifts<>+0x48(SB)/4, $0xffffffff 1252 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1253 1254 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1255 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1256 DATA shifts<>+0x58(SB)/4, $0xffffffff 1257 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1258 1259 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1260 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1261 DATA shifts<>+0x68(SB)/4, $0xffffffff 1262 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1263 1264 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1265 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1266 DATA shifts<>+0x78(SB)/4, $0xffffffff 1267 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1268 1269 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1270 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1271 DATA shifts<>+0x88(SB)/4, $0xffffffff 1272 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1273 1274 DATA shifts<>+0x90(SB)/4, $0x0a090807 1275 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1276 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1277 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1278 1279 DATA shifts<>+0xa0(SB)/4, $0x09080706 1280 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1281 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1282 DATA shifts<>+0xac(SB)/4, $0xffffffff 1283 1284 DATA shifts<>+0xb0(SB)/4, $0x08070605 1285 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1286 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1287 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1288 1289 DATA shifts<>+0xc0(SB)/4, $0x07060504 1290 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1291 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1292 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1293 1294 DATA shifts<>+0xd0(SB)/4, $0x06050403 1295 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1296 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1297 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1298 1299 DATA shifts<>+0xe0(SB)/4, $0x05040302 1300 DATA shifts<>+0xe4(SB)/4, $0x09080706 1301 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1302 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1303 1304 DATA shifts<>+0xf0(SB)/4, $0x04030201 1305 DATA shifts<>+0xf4(SB)/4, $0x08070605 1306 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1307 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1308 1309 GLOBL shifts<>(SB),RODATA,$256 1310 1311 TEXT ·checkASM(SB),NOSPLIT,$0-1 1312 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1313 MOVL $masks<>(SB), AX 1314 MOVL $shifts<>(SB), BX 1315 ORL BX, AX 1316 TESTL $15, AX 1317 SETEQ ret+0(FP) 1318 RET 1319 1320 TEXT runtime·return0(SB), NOSPLIT, $0 1321 MOVL $0, AX 1322 RET 1323 1324 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1325 // Must obey the gcc calling convention. 1326 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1327 get_tls(CX) 1328 MOVL g(CX), AX 1329 MOVL g_m(AX), AX 1330 MOVL m_curg(AX), AX 1331 MOVL (g_stack+stack_hi)(AX), AX 1332 RET 1333 1334 // The top-most function running on a goroutine 1335 // returns to goexit+PCQuantum. 1336 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1337 BYTE $0x90 // NOP 1338 CALL runtime·goexit1(SB) // does not return 1339 // traceback from goexit1 must hit code range of goexit 1340 BYTE $0x90 // NOP 1341 1342 // Add a module's moduledata to the linked list of moduledata objects. This 1343 // is called from .init_array by a function generated in the linker and so 1344 // follows the platform ABI wrt register preservation -- it only touches AX, 1345 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1346 // instead the pointer to the moduledata is passed in AX. 1347 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1348 MOVL runtime·lastmoduledatap(SB), DX 1349 MOVL AX, moduledata_next(DX) 1350 MOVL AX, runtime·lastmoduledatap(SB) 1351 RET 1352 1353 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1354 MOVL a+0(FP), AX 1355 MOVL AX, 0(SP) 1356 MOVL $0, 4(SP) 1357 FMOVV 0(SP), F0 1358 FMOVDP F0, ret+4(FP) 1359 RET 1360 1361 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1362 FMOVD a+0(FP), F0 1363 FSTCW 0(SP) 1364 FLDCW runtime·controlWord64trunc(SB) 1365 FMOVVP F0, 4(SP) 1366 FLDCW 0(SP) 1367 MOVL 4(SP), AX 1368 MOVL AX, ret+8(FP) 1369 RET 1370 1371 // gcWriteBarrier performs a heap pointer write and informs the GC. 1372 // 1373 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1374 // - DI is the destination of the write 1375 // - AX is the value being written at DI 1376 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1377 // but may clobber others (e.g., SSE registers). 1378 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28 1379 // Save the registers clobbered by the fast path. This is slightly 1380 // faster than having the caller spill these. 1381 MOVL CX, 20(SP) 1382 MOVL BX, 24(SP) 1383 // TODO: Consider passing g.m.p in as an argument so they can be shared 1384 // across a sequence of write barriers. 1385 get_tls(BX) 1386 MOVL g(BX), BX 1387 MOVL g_m(BX), BX 1388 MOVL m_p(BX), BX 1389 MOVL (p_wbBuf+wbBuf_next)(BX), CX 1390 // Increment wbBuf.next position. 1391 LEAL 8(CX), CX 1392 MOVL CX, (p_wbBuf+wbBuf_next)(BX) 1393 CMPL CX, (p_wbBuf+wbBuf_end)(BX) 1394 // Record the write. 1395 MOVL AX, -8(CX) // Record value 1396 MOVL (DI), BX // TODO: This turns bad writes into bad reads. 1397 MOVL BX, -4(CX) // Record *slot 1398 // Is the buffer full? (flags set in CMPL above) 1399 JEQ flush 1400 ret: 1401 MOVL 20(SP), CX 1402 MOVL 24(SP), BX 1403 // Do the write. 1404 MOVL AX, (DI) 1405 RET 1406 1407 flush: 1408 // Save all general purpose registers since these could be 1409 // clobbered by wbBufFlush and were not saved by the caller. 1410 MOVL DI, 0(SP) // Also first argument to wbBufFlush 1411 MOVL AX, 4(SP) // Also second argument to wbBufFlush 1412 // BX already saved 1413 // CX already saved 1414 MOVL DX, 8(SP) 1415 MOVL BP, 12(SP) 1416 MOVL SI, 16(SP) 1417 // DI already saved 1418 1419 // This takes arguments DI and AX 1420 CALL runtime·wbBufFlush(SB) 1421 1422 MOVL 0(SP), DI 1423 MOVL 4(SP), AX 1424 MOVL 8(SP), DX 1425 MOVL 12(SP), BP 1426 MOVL 16(SP), SI 1427 JMP ret 1428 1429 // Note: these functions use a special calling convention to save generated code space. 1430 // Arguments are passed in registers, but the space for those arguments are allocated 1431 // in the caller's stack frame. These stubs write the args into that stack space and 1432 // then tail call to the corresponding runtime handler. 1433 // The tail call makes these stubs disappear in backtraces. 1434 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8 1435 MOVL AX, x+0(FP) 1436 MOVL CX, y+4(FP) 1437 JMP runtime·goPanicIndex(SB) 1438 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8 1439 MOVL AX, x+0(FP) 1440 MOVL CX, y+4(FP) 1441 JMP runtime·goPanicIndexU(SB) 1442 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8 1443 MOVL CX, x+0(FP) 1444 MOVL DX, y+4(FP) 1445 JMP runtime·goPanicSliceAlen(SB) 1446 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8 1447 MOVL CX, x+0(FP) 1448 MOVL DX, y+4(FP) 1449 JMP runtime·goPanicSliceAlenU(SB) 1450 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8 1451 MOVL CX, x+0(FP) 1452 MOVL DX, y+4(FP) 1453 JMP runtime·goPanicSliceAcap(SB) 1454 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8 1455 MOVL CX, x+0(FP) 1456 MOVL DX, y+4(FP) 1457 JMP runtime·goPanicSliceAcapU(SB) 1458 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8 1459 MOVL AX, x+0(FP) 1460 MOVL CX, y+4(FP) 1461 JMP runtime·goPanicSliceB(SB) 1462 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8 1463 MOVL AX, x+0(FP) 1464 MOVL CX, y+4(FP) 1465 JMP runtime·goPanicSliceBU(SB) 1466 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8 1467 MOVL DX, x+0(FP) 1468 MOVL BX, y+4(FP) 1469 JMP runtime·goPanicSlice3Alen(SB) 1470 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8 1471 MOVL DX, x+0(FP) 1472 MOVL BX, y+4(FP) 1473 JMP runtime·goPanicSlice3AlenU(SB) 1474 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8 1475 MOVL DX, x+0(FP) 1476 MOVL BX, y+4(FP) 1477 JMP runtime·goPanicSlice3Acap(SB) 1478 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8 1479 MOVL DX, x+0(FP) 1480 MOVL BX, y+4(FP) 1481 JMP runtime·goPanicSlice3AcapU(SB) 1482 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8 1483 MOVL CX, x+0(FP) 1484 MOVL DX, y+4(FP) 1485 JMP runtime·goPanicSlice3B(SB) 1486 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8 1487 MOVL CX, x+0(FP) 1488 MOVL DX, y+4(FP) 1489 JMP runtime·goPanicSlice3BU(SB) 1490 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8 1491 MOVL AX, x+0(FP) 1492 MOVL CX, y+4(FP) 1493 JMP runtime·goPanicSlice3C(SB) 1494 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8 1495 MOVL AX, x+0(FP) 1496 MOVL CX, y+4(FP) 1497 JMP runtime·goPanicSlice3CU(SB) 1498 1499 // Extended versions for 64-bit indexes. 1500 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12 1501 MOVL SI, hi+0(FP) 1502 MOVL AX, lo+4(FP) 1503 MOVL CX, y+8(FP) 1504 JMP runtime·goPanicExtendIndex(SB) 1505 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12 1506 MOVL SI, hi+0(FP) 1507 MOVL AX, lo+4(FP) 1508 MOVL CX, y+8(FP) 1509 JMP runtime·goPanicExtendIndexU(SB) 1510 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12 1511 MOVL SI, hi+0(FP) 1512 MOVL CX, lo+4(FP) 1513 MOVL DX, y+8(FP) 1514 JMP runtime·goPanicExtendSliceAlen(SB) 1515 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12 1516 MOVL SI, hi+0(FP) 1517 MOVL CX, lo+4(FP) 1518 MOVL DX, y+8(FP) 1519 JMP runtime·goPanicExtendSliceAlenU(SB) 1520 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12 1521 MOVL SI, hi+0(FP) 1522 MOVL CX, lo+4(FP) 1523 MOVL DX, y+8(FP) 1524 JMP runtime·goPanicExtendSliceAcap(SB) 1525 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12 1526 MOVL SI, hi+0(FP) 1527 MOVL CX, lo+4(FP) 1528 MOVL DX, y+8(FP) 1529 JMP runtime·goPanicExtendSliceAcapU(SB) 1530 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12 1531 MOVL SI, hi+0(FP) 1532 MOVL AX, lo+4(FP) 1533 MOVL CX, y+8(FP) 1534 JMP runtime·goPanicExtendSliceB(SB) 1535 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12 1536 MOVL SI, hi+0(FP) 1537 MOVL AX, lo+4(FP) 1538 MOVL CX, y+8(FP) 1539 JMP runtime·goPanicExtendSliceBU(SB) 1540 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12 1541 MOVL SI, hi+0(FP) 1542 MOVL DX, lo+4(FP) 1543 MOVL BX, y+8(FP) 1544 JMP runtime·goPanicExtendSlice3Alen(SB) 1545 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12 1546 MOVL SI, hi+0(FP) 1547 MOVL DX, lo+4(FP) 1548 MOVL BX, y+8(FP) 1549 JMP runtime·goPanicExtendSlice3AlenU(SB) 1550 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12 1551 MOVL SI, hi+0(FP) 1552 MOVL DX, lo+4(FP) 1553 MOVL BX, y+8(FP) 1554 JMP runtime·goPanicExtendSlice3Acap(SB) 1555 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12 1556 MOVL SI, hi+0(FP) 1557 MOVL DX, lo+4(FP) 1558 MOVL BX, y+8(FP) 1559 JMP runtime·goPanicExtendSlice3AcapU(SB) 1560 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12 1561 MOVL SI, hi+0(FP) 1562 MOVL CX, lo+4(FP) 1563 MOVL DX, y+8(FP) 1564 JMP runtime·goPanicExtendSlice3B(SB) 1565 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12 1566 MOVL SI, hi+0(FP) 1567 MOVL CX, lo+4(FP) 1568 MOVL DX, y+8(FP) 1569 JMP runtime·goPanicExtendSlice3BU(SB) 1570 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12 1571 MOVL SI, hi+0(FP) 1572 MOVL AX, lo+4(FP) 1573 MOVL CX, y+8(FP) 1574 JMP runtime·goPanicExtendSlice3C(SB) 1575 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 1576 MOVL SI, hi+0(FP) 1577 MOVL AX, lo+4(FP) 1578 MOVL CX, y+8(FP) 1579 JMP runtime·goPanicExtendSlice3CU(SB) 1580 1581 #ifdef GOOS_android 1582 // Use the free TLS_SLOT_APP slot #2 on Android Q. 1583 // Earlier androids are set up in gcc_android.c. 1584 DATA runtime·tls_g+0(SB)/4, $8 1585 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1586 #endif