github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtime·rt0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtime·libpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtime·newosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtime·rt0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtime·g0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 // first see if CPUID instruction is supported. 113 PUSHFL 114 PUSHFL 115 XORL $(1<<21), 0(SP) // flip ID bit 116 POPFL 117 PUSHFL 118 POPL AX 119 XORL 0(SP), AX 120 POPFL // restore EFLAGS 121 TESTL $(1<<21), AX 122 JNE has_cpuid 123 124 bad_proc: // show that the program requires MMX. 125 MOVL $2, 0(SP) 126 MOVL $bad_proc_msg<>(SB), 4(SP) 127 MOVL $0x3d, 8(SP) 128 CALL runtime·write(SB) 129 MOVL $1, 0(SP) 130 CALL runtime·exit(SB) 131 CALL runtime·abort(SB) 132 133 has_cpuid: 134 MOVL $0, AX 135 CPUID 136 MOVL AX, SI 137 CMPL AX, $0 138 JE nocpuinfo 139 140 CMPL BX, $0x756E6547 // "Genu" 141 JNE notintel 142 CMPL DX, $0x49656E69 // "ineI" 143 JNE notintel 144 CMPL CX, $0x6C65746E // "ntel" 145 JNE notintel 146 MOVB $1, runtime·isIntel(SB) 147 notintel: 148 149 // Load EAX=1 cpuid flags 150 MOVL $1, AX 151 CPUID 152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 153 MOVL AX, runtime·processorVersionInfo(SB) 154 155 // Check for MMX support 156 TESTL $(1<<23), DX // MMX 157 JZ bad_proc 158 159 nocpuinfo: 160 // if there is an _cgo_init, call it to let it 161 // initialize and to set up GS. if not, 162 // we set up GS ourselves. 163 MOVL _cgo_init(SB), AX 164 TESTL AX, AX 165 JZ needtls 166 #ifdef GOOS_android 167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 168 // Compensate for tls_g (+8). 169 MOVL -8(TLS), BX 170 MOVL BX, 12(SP) 171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 172 #else 173 MOVL $0, BX 174 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS 175 #ifdef GOOS_windows 176 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g 177 #else 178 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS 179 #endif 180 #endif 181 MOVL $setg_gcc<>(SB), BX 182 MOVL BX, 4(SP) // arg 2: setg_gcc 183 MOVL BP, 0(SP) // arg 1: g0 184 CALL AX 185 186 // update stackguard after _cgo_init 187 MOVL $runtime·g0(SB), CX 188 MOVL (g_stack+stack_lo)(CX), AX 189 ADDL $const_stackGuard, AX 190 MOVL AX, g_stackguard0(CX) 191 MOVL AX, g_stackguard1(CX) 192 193 #ifndef GOOS_windows 194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 195 JMP ok 196 #endif 197 needtls: 198 #ifdef GOOS_openbsd 199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases 200 JMP ok 201 #endif 202 #ifdef GOOS_plan9 203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 204 JMP ok 205 #endif 206 207 // set up %gs 208 CALL ldt0setup<>(SB) 209 210 // store through it, to make sure it works 211 get_tls(BX) 212 MOVL $0x123, g(BX) 213 MOVL runtime·m0+m_tls(SB), AX 214 CMPL AX, $0x123 215 JEQ ok 216 MOVL AX, 0 // abort 217 ok: 218 // set up m and g "registers" 219 get_tls(BX) 220 LEAL runtime·g0(SB), DX 221 MOVL DX, g(BX) 222 LEAL runtime·m0(SB), AX 223 224 // save m->g0 = g0 225 MOVL DX, m_g0(AX) 226 // save g0->m = m0 227 MOVL AX, g_m(DX) 228 229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 230 231 // convention is D is always cleared 232 CLD 233 234 CALL runtime·check(SB) 235 236 // saved argc, argv 237 MOVL 120(SP), AX 238 MOVL AX, 0(SP) 239 MOVL 124(SP), AX 240 MOVL AX, 4(SP) 241 CALL runtime·args(SB) 242 CALL runtime·osinit(SB) 243 CALL runtime·schedinit(SB) 244 245 // create a new goroutine to start program 246 PUSHL $runtime·mainPC(SB) // entry 247 CALL runtime·newproc(SB) 248 POPL AX 249 250 // start this M 251 CALL runtime·mstart(SB) 252 253 CALL runtime·abort(SB) 254 RET 255 256 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n" 257 GLOBL bad_proc_msg<>(SB), RODATA, $61 258 259 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 260 GLOBL runtime·mainPC(SB),RODATA,$4 261 262 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 263 INT $3 264 RET 265 266 TEXT runtime·asminit(SB),NOSPLIT,$0-0 267 // Linux and MinGW start the FPU in extended double precision. 268 // Other operating systems use double precision. 269 // Change to double precision to match them, 270 // and to match other hardware that only has double. 271 FLDCW runtime·controlWord64(SB) 272 RET 273 274 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 275 CALL runtime·mstart0(SB) 276 RET // not reached 277 278 /* 279 * go-routine 280 */ 281 282 // void gogo(Gobuf*) 283 // restore state from Gobuf; longjmp 284 TEXT runtime·gogo(SB), NOSPLIT, $0-4 285 MOVL buf+0(FP), BX // gobuf 286 MOVL gobuf_g(BX), DX 287 MOVL 0(DX), CX // make sure g != nil 288 JMP gogo<>(SB) 289 290 TEXT gogo<>(SB), NOSPLIT, $0 291 get_tls(CX) 292 MOVL DX, g(CX) 293 MOVL gobuf_sp(BX), SP // restore SP 294 MOVL gobuf_ret(BX), AX 295 MOVL gobuf_ctxt(BX), DX 296 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 297 MOVL $0, gobuf_ret(BX) 298 MOVL $0, gobuf_ctxt(BX) 299 MOVL gobuf_pc(BX), BX 300 JMP BX 301 302 // func mcall(fn func(*g)) 303 // Switch to m->g0's stack, call fn(g). 304 // Fn must never return. It should gogo(&g->sched) 305 // to keep running g. 306 TEXT runtime·mcall(SB), NOSPLIT, $0-4 307 MOVL fn+0(FP), DI 308 309 get_tls(DX) 310 MOVL g(DX), AX // save state in g->sched 311 MOVL 0(SP), BX // caller's PC 312 MOVL BX, (g_sched+gobuf_pc)(AX) 313 LEAL fn+0(FP), BX // caller's SP 314 MOVL BX, (g_sched+gobuf_sp)(AX) 315 316 // switch to m->g0 & its stack, call fn 317 MOVL g(DX), BX 318 MOVL g_m(BX), BX 319 MOVL m_g0(BX), SI 320 CMPL SI, AX // if g == m->g0 call badmcall 321 JNE 3(PC) 322 MOVL $runtime·badmcall(SB), AX 323 JMP AX 324 MOVL SI, g(DX) // g = m->g0 325 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 326 PUSHL AX 327 MOVL DI, DX 328 MOVL 0(DI), DI 329 CALL DI 330 POPL AX 331 MOVL $runtime·badmcall2(SB), AX 332 JMP AX 333 RET 334 335 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 336 // of the G stack. We need to distinguish the routine that 337 // lives at the bottom of the G stack from the one that lives 338 // at the top of the system stack because the one at the top of 339 // the system stack terminates the stack walk (see topofstack()). 340 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 341 RET 342 343 // func systemstack(fn func()) 344 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 345 MOVL fn+0(FP), DI // DI = fn 346 get_tls(CX) 347 MOVL g(CX), AX // AX = g 348 MOVL g_m(AX), BX // BX = m 349 350 CMPL AX, m_gsignal(BX) 351 JEQ noswitch 352 353 MOVL m_g0(BX), DX // DX = g0 354 CMPL AX, DX 355 JEQ noswitch 356 357 CMPL AX, m_curg(BX) 358 JNE bad 359 360 // switch stacks 361 // save our state in g->sched. Pretend to 362 // be systemstack_switch if the G stack is scanned. 363 CALL gosave_systemstack_switch<>(SB) 364 365 // switch to g0 366 get_tls(CX) 367 MOVL DX, g(CX) 368 MOVL (g_sched+gobuf_sp)(DX), BX 369 MOVL BX, SP 370 371 // call target function 372 MOVL DI, DX 373 MOVL 0(DI), DI 374 CALL DI 375 376 // switch back to g 377 get_tls(CX) 378 MOVL g(CX), AX 379 MOVL g_m(AX), BX 380 MOVL m_curg(BX), AX 381 MOVL AX, g(CX) 382 MOVL (g_sched+gobuf_sp)(AX), SP 383 MOVL $0, (g_sched+gobuf_sp)(AX) 384 RET 385 386 noswitch: 387 // already on system stack; tail call the function 388 // Using a tail call here cleans up tracebacks since we won't stop 389 // at an intermediate systemstack. 390 MOVL DI, DX 391 MOVL 0(DI), DI 392 JMP DI 393 394 bad: 395 // Bad: g is not gsignal, not g0, not curg. What is it? 396 // Hide call from linker nosplit analysis. 397 MOVL $runtime·badsystemstack(SB), AX 398 CALL AX 399 INT $3 400 401 // func switchToCrashStack0(fn func()) 402 TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-4 403 MOVL fn+0(FP), AX 404 405 get_tls(CX) 406 MOVL g(CX), BX // BX = g 407 MOVL g_m(BX), DX // DX = curm 408 409 // set g to gcrash 410 LEAL runtime·gcrash(SB), BX // g = &gcrash 411 MOVL DX, g_m(BX) // g.m = curm 412 MOVL BX, m_g0(DX) // curm.g0 = g 413 get_tls(CX) 414 MOVL BX, g(CX) 415 416 // switch to crashstack 417 MOVL (g_stack+stack_hi)(BX), DX 418 SUBL $(4*8), DX 419 MOVL DX, SP 420 421 // call target function 422 MOVL AX, DX 423 MOVL 0(AX), AX 424 CALL AX 425 426 // should never return 427 CALL runtime·abort(SB) 428 UNDEF 429 430 /* 431 * support for morestack 432 */ 433 434 // Called during function prolog when more stack is needed. 435 // 436 // The traceback routines see morestack on a g0 as being 437 // the top of a stack (for example, morestack calling newstack 438 // calling the scheduler calling newm calling gc), so we must 439 // record an argument size. For that purpose, it has no arguments. 440 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 441 // Cannot grow scheduler stack (m->g0). 442 get_tls(CX) 443 MOVL g(CX), DI 444 MOVL g_m(DI), BX 445 446 // Set g->sched to context in f. 447 MOVL 0(SP), AX // f's PC 448 MOVL AX, (g_sched+gobuf_pc)(DI) 449 LEAL 4(SP), AX // f's SP 450 MOVL AX, (g_sched+gobuf_sp)(DI) 451 MOVL DX, (g_sched+gobuf_ctxt)(DI) 452 453 MOVL m_g0(BX), SI 454 CMPL g(CX), SI 455 JNE 3(PC) 456 CALL runtime·badmorestackg0(SB) 457 CALL runtime·abort(SB) 458 459 // Cannot grow signal stack. 460 MOVL m_gsignal(BX), SI 461 CMPL g(CX), SI 462 JNE 3(PC) 463 CALL runtime·badmorestackgsignal(SB) 464 CALL runtime·abort(SB) 465 466 // Called from f. 467 // Set m->morebuf to f's caller. 468 NOP SP // tell vet SP changed - stop checking offsets 469 MOVL 4(SP), DI // f's caller's PC 470 MOVL DI, (m_morebuf+gobuf_pc)(BX) 471 LEAL 8(SP), CX // f's caller's SP 472 MOVL CX, (m_morebuf+gobuf_sp)(BX) 473 get_tls(CX) 474 MOVL g(CX), SI 475 MOVL SI, (m_morebuf+gobuf_g)(BX) 476 477 // Call newstack on m->g0's stack. 478 MOVL m_g0(BX), BP 479 MOVL BP, g(CX) 480 MOVL (g_sched+gobuf_sp)(BP), AX 481 MOVL -4(AX), BX // fault if CALL would, before smashing SP 482 MOVL AX, SP 483 CALL runtime·newstack(SB) 484 CALL runtime·abort(SB) // crash if newstack returns 485 RET 486 487 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 488 MOVL $0, DX 489 JMP runtime·morestack(SB) 490 491 // reflectcall: call a function with the given argument list 492 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). 493 // we don't have variable-sized frames, so we use a small number 494 // of constant-sized-frame functions to encode a few bits of size in the pc. 495 // Caution: ugly multiline assembly macros in your future! 496 497 #define DISPATCH(NAME,MAXSIZE) \ 498 CMPL CX, $MAXSIZE; \ 499 JA 3(PC); \ 500 MOVL $NAME(SB), AX; \ 501 JMP AX 502 // Note: can't just "JMP NAME(SB)" - bad inlining results. 503 504 TEXT ·reflectcall(SB), NOSPLIT, $0-28 505 MOVL frameSize+20(FP), CX 506 DISPATCH(runtime·call16, 16) 507 DISPATCH(runtime·call32, 32) 508 DISPATCH(runtime·call64, 64) 509 DISPATCH(runtime·call128, 128) 510 DISPATCH(runtime·call256, 256) 511 DISPATCH(runtime·call512, 512) 512 DISPATCH(runtime·call1024, 1024) 513 DISPATCH(runtime·call2048, 2048) 514 DISPATCH(runtime·call4096, 4096) 515 DISPATCH(runtime·call8192, 8192) 516 DISPATCH(runtime·call16384, 16384) 517 DISPATCH(runtime·call32768, 32768) 518 DISPATCH(runtime·call65536, 65536) 519 DISPATCH(runtime·call131072, 131072) 520 DISPATCH(runtime·call262144, 262144) 521 DISPATCH(runtime·call524288, 524288) 522 DISPATCH(runtime·call1048576, 1048576) 523 DISPATCH(runtime·call2097152, 2097152) 524 DISPATCH(runtime·call4194304, 4194304) 525 DISPATCH(runtime·call8388608, 8388608) 526 DISPATCH(runtime·call16777216, 16777216) 527 DISPATCH(runtime·call33554432, 33554432) 528 DISPATCH(runtime·call67108864, 67108864) 529 DISPATCH(runtime·call134217728, 134217728) 530 DISPATCH(runtime·call268435456, 268435456) 531 DISPATCH(runtime·call536870912, 536870912) 532 DISPATCH(runtime·call1073741824, 1073741824) 533 MOVL $runtime·badreflectcall(SB), AX 534 JMP AX 535 536 #define CALLFN(NAME,MAXSIZE) \ 537 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \ 538 NO_LOCAL_POINTERS; \ 539 /* copy arguments to stack */ \ 540 MOVL stackArgs+8(FP), SI; \ 541 MOVL stackArgsSize+12(FP), CX; \ 542 MOVL SP, DI; \ 543 REP;MOVSB; \ 544 /* call function */ \ 545 MOVL f+4(FP), DX; \ 546 MOVL (DX), AX; \ 547 PCDATA $PCDATA_StackMapIndex, $0; \ 548 CALL AX; \ 549 /* copy return values back */ \ 550 MOVL stackArgsType+0(FP), DX; \ 551 MOVL stackArgs+8(FP), DI; \ 552 MOVL stackArgsSize+12(FP), CX; \ 553 MOVL stackRetOffset+16(FP), BX; \ 554 MOVL SP, SI; \ 555 ADDL BX, DI; \ 556 ADDL BX, SI; \ 557 SUBL BX, CX; \ 558 CALL callRet<>(SB); \ 559 RET 560 561 // callRet copies return values back at the end of call*. This is a 562 // separate function so it can allocate stack space for the arguments 563 // to reflectcallmove. It does not follow the Go ABI; it expects its 564 // arguments in registers. 565 TEXT callRet<>(SB), NOSPLIT, $20-0 566 MOVL DX, 0(SP) 567 MOVL DI, 4(SP) 568 MOVL SI, 8(SP) 569 MOVL CX, 12(SP) 570 MOVL $0, 16(SP) 571 CALL runtime·reflectcallmove(SB) 572 RET 573 574 CALLFN(·call16, 16) 575 CALLFN(·call32, 32) 576 CALLFN(·call64, 64) 577 CALLFN(·call128, 128) 578 CALLFN(·call256, 256) 579 CALLFN(·call512, 512) 580 CALLFN(·call1024, 1024) 581 CALLFN(·call2048, 2048) 582 CALLFN(·call4096, 4096) 583 CALLFN(·call8192, 8192) 584 CALLFN(·call16384, 16384) 585 CALLFN(·call32768, 32768) 586 CALLFN(·call65536, 65536) 587 CALLFN(·call131072, 131072) 588 CALLFN(·call262144, 262144) 589 CALLFN(·call524288, 524288) 590 CALLFN(·call1048576, 1048576) 591 CALLFN(·call2097152, 2097152) 592 CALLFN(·call4194304, 4194304) 593 CALLFN(·call8388608, 8388608) 594 CALLFN(·call16777216, 16777216) 595 CALLFN(·call33554432, 33554432) 596 CALLFN(·call67108864, 67108864) 597 CALLFN(·call134217728, 134217728) 598 CALLFN(·call268435456, 268435456) 599 CALLFN(·call536870912, 536870912) 600 CALLFN(·call1073741824, 1073741824) 601 602 TEXT runtime·procyield(SB),NOSPLIT,$0-0 603 MOVL cycles+0(FP), AX 604 again: 605 PAUSE 606 SUBL $1, AX 607 JNZ again 608 RET 609 610 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 611 // Stores are already ordered on x86, so this is just a 612 // compile barrier. 613 RET 614 615 // Save state of caller into g->sched, 616 // but using fake PC from systemstack_switch. 617 // Must only be called from functions with no locals ($0) 618 // or else unwinding from systemstack_switch is incorrect. 619 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0 620 PUSHL AX 621 PUSHL BX 622 get_tls(BX) 623 MOVL g(BX), BX 624 LEAL arg+0(FP), AX 625 MOVL AX, (g_sched+gobuf_sp)(BX) 626 MOVL $runtime·systemstack_switch(SB), AX 627 MOVL AX, (g_sched+gobuf_pc)(BX) 628 MOVL $0, (g_sched+gobuf_ret)(BX) 629 // Assert ctxt is zero. See func save. 630 MOVL (g_sched+gobuf_ctxt)(BX), AX 631 TESTL AX, AX 632 JZ 2(PC) 633 CALL runtime·abort(SB) 634 POPL BX 635 POPL AX 636 RET 637 638 // func asmcgocall_no_g(fn, arg unsafe.Pointer) 639 // Call fn(arg) aligned appropriately for the gcc ABI. 640 // Called on a system stack, and there may be no g yet (during needm). 641 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8 642 MOVL fn+0(FP), AX 643 MOVL arg+4(FP), BX 644 MOVL SP, DX 645 SUBL $32, SP 646 ANDL $~15, SP // alignment, perhaps unnecessary 647 MOVL DX, 8(SP) // save old SP 648 MOVL BX, 0(SP) // first argument in x86-32 ABI 649 CALL AX 650 MOVL 8(SP), DX 651 MOVL DX, SP 652 RET 653 654 // func asmcgocall(fn, arg unsafe.Pointer) int32 655 // Call fn(arg) on the scheduler stack, 656 // aligned appropriately for the gcc ABI. 657 // See cgocall.go for more details. 658 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 659 MOVL fn+0(FP), AX 660 MOVL arg+4(FP), BX 661 662 MOVL SP, DX 663 664 // Figure out if we need to switch to m->g0 stack. 665 // We get called to create new OS threads too, and those 666 // come in on the m->g0 stack already. Or we might already 667 // be on the m->gsignal stack. 668 get_tls(CX) 669 MOVL g(CX), DI 670 CMPL DI, $0 671 JEQ nosave // Don't even have a G yet. 672 MOVL g_m(DI), BP 673 CMPL DI, m_gsignal(BP) 674 JEQ noswitch 675 MOVL m_g0(BP), SI 676 CMPL DI, SI 677 JEQ noswitch 678 CALL gosave_systemstack_switch<>(SB) 679 get_tls(CX) 680 MOVL SI, g(CX) 681 MOVL (g_sched+gobuf_sp)(SI), SP 682 683 noswitch: 684 // Now on a scheduling stack (a pthread-created stack). 685 SUBL $32, SP 686 ANDL $~15, SP // alignment, perhaps unnecessary 687 MOVL DI, 8(SP) // save g 688 MOVL (g_stack+stack_hi)(DI), DI 689 SUBL DX, DI 690 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 691 MOVL BX, 0(SP) // first argument in x86-32 ABI 692 CALL AX 693 694 // Restore registers, g, stack pointer. 695 get_tls(CX) 696 MOVL 8(SP), DI 697 MOVL (g_stack+stack_hi)(DI), SI 698 SUBL 4(SP), SI 699 MOVL DI, g(CX) 700 MOVL SI, SP 701 702 MOVL AX, ret+8(FP) 703 RET 704 nosave: 705 // Now on a scheduling stack (a pthread-created stack). 706 SUBL $32, SP 707 ANDL $~15, SP // alignment, perhaps unnecessary 708 MOVL DX, 4(SP) // save original stack pointer 709 MOVL BX, 0(SP) // first argument in x86-32 ABI 710 CALL AX 711 712 MOVL 4(SP), CX // restore original stack pointer 713 MOVL CX, SP 714 MOVL AX, ret+8(FP) 715 RET 716 717 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) 718 // See cgocall.go for more details. 719 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below 720 NO_LOCAL_POINTERS 721 722 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g. 723 // It is used to dropm while thread is exiting. 724 MOVL fn+0(FP), AX 725 CMPL AX, $0 726 JNE loadg 727 // Restore the g from frame. 728 get_tls(CX) 729 MOVL frame+4(FP), BX 730 MOVL BX, g(CX) 731 JMP dropm 732 733 loadg: 734 // If g is nil, Go did not create the current thread, 735 // or if this thread never called into Go on pthread platforms. 736 // Call needm to obtain one for temporary use. 737 // In this case, we're running on the thread stack, so there's 738 // lots of space, but the linker doesn't know. Hide the call from 739 // the linker analysis by using an indirect call through AX. 740 get_tls(CX) 741 #ifdef GOOS_windows 742 MOVL $0, BP 743 CMPL CX, $0 744 JEQ 2(PC) // TODO 745 #endif 746 MOVL g(CX), BP 747 CMPL BP, $0 748 JEQ needm 749 MOVL g_m(BP), BP 750 MOVL BP, savedm-4(SP) // saved copy of oldm 751 JMP havem 752 needm: 753 MOVL $runtime·needAndBindM(SB), AX 754 CALL AX 755 MOVL $0, savedm-4(SP) 756 get_tls(CX) 757 MOVL g(CX), BP 758 MOVL g_m(BP), BP 759 760 // Set m->sched.sp = SP, so that if a panic happens 761 // during the function we are about to execute, it will 762 // have a valid SP to run on the g0 stack. 763 // The next few lines (after the havem label) 764 // will save this SP onto the stack and then write 765 // the same SP back to m->sched.sp. That seems redundant, 766 // but if an unrecovered panic happens, unwindm will 767 // restore the g->sched.sp from the stack location 768 // and then systemstack will try to use it. If we don't set it here, 769 // that restored SP will be uninitialized (typically 0) and 770 // will not be usable. 771 MOVL m_g0(BP), SI 772 MOVL SP, (g_sched+gobuf_sp)(SI) 773 774 havem: 775 // Now there's a valid m, and we're running on its m->g0. 776 // Save current m->g0->sched.sp on stack and then set it to SP. 777 // Save current sp in m->g0->sched.sp in preparation for 778 // switch back to m->curg stack. 779 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 780 MOVL m_g0(BP), SI 781 MOVL (g_sched+gobuf_sp)(SI), AX 782 MOVL AX, 0(SP) 783 MOVL SP, (g_sched+gobuf_sp)(SI) 784 785 // Switch to m->curg stack and call runtime.cgocallbackg. 786 // Because we are taking over the execution of m->curg 787 // but *not* resuming what had been running, we need to 788 // save that information (m->curg->sched) so we can restore it. 789 // We can restore m->curg->sched.sp easily, because calling 790 // runtime.cgocallbackg leaves SP unchanged upon return. 791 // To save m->curg->sched.pc, we push it onto the curg stack and 792 // open a frame the same size as cgocallback's g0 frame. 793 // Once we switch to the curg stack, the pushed PC will appear 794 // to be the return PC of cgocallback, so that the traceback 795 // will seamlessly trace back into the earlier calls. 796 MOVL m_curg(BP), SI 797 MOVL SI, g(CX) 798 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 799 MOVL (g_sched+gobuf_pc)(SI), BP 800 MOVL BP, -4(DI) // "push" return PC on the g stack 801 // Gather our arguments into registers. 802 MOVL fn+0(FP), AX 803 MOVL frame+4(FP), BX 804 MOVL ctxt+8(FP), CX 805 LEAL -(4+12)(DI), SP // Must match declared frame size 806 MOVL AX, 0(SP) 807 MOVL BX, 4(SP) 808 MOVL CX, 8(SP) 809 CALL runtime·cgocallbackg(SB) 810 811 // Restore g->sched (== m->curg->sched) from saved values. 812 get_tls(CX) 813 MOVL g(CX), SI 814 MOVL 12(SP), BP // Must match declared frame size 815 MOVL BP, (g_sched+gobuf_pc)(SI) 816 LEAL (12+4)(SP), DI // Must match declared frame size 817 MOVL DI, (g_sched+gobuf_sp)(SI) 818 819 // Switch back to m->g0's stack and restore m->g0->sched.sp. 820 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 821 // so we do not have to restore it.) 822 MOVL g(CX), BP 823 MOVL g_m(BP), BP 824 MOVL m_g0(BP), SI 825 MOVL SI, g(CX) 826 MOVL (g_sched+gobuf_sp)(SI), SP 827 MOVL 0(SP), AX 828 MOVL AX, (g_sched+gobuf_sp)(SI) 829 830 // If the m on entry was nil, we called needm above to borrow an m, 831 // 1. for the duration of the call on non-pthread platforms, 832 // 2. or the duration of the C thread alive on pthread platforms. 833 // If the m on entry wasn't nil, 834 // 1. the thread might be a Go thread, 835 // 2. or it wasn't the first call from a C thread on pthread platforms, 836 // since then we skip dropm to reuse the m in the first call. 837 MOVL savedm-4(SP), DX 838 CMPL DX, $0 839 JNE droppedm 840 841 // Skip dropm to reuse it in the next call, when a pthread key has been created. 842 MOVL _cgo_pthread_key_created(SB), DX 843 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm. 844 CMPL DX, $0 845 JEQ dropm 846 CMPL (DX), $0 847 JNE droppedm 848 849 dropm: 850 MOVL $runtime·dropm(SB), AX 851 CALL AX 852 droppedm: 853 854 // Done! 855 RET 856 857 // void setg(G*); set g. for use by needm. 858 TEXT runtime·setg(SB), NOSPLIT, $0-4 859 MOVL gg+0(FP), BX 860 #ifdef GOOS_windows 861 MOVL runtime·tls_g(SB), CX 862 CMPL BX, $0 863 JNE settls 864 MOVL $0, 0(CX)(FS) 865 RET 866 settls: 867 MOVL g_m(BX), AX 868 LEAL m_tls(AX), AX 869 MOVL AX, 0(CX)(FS) 870 #endif 871 get_tls(CX) 872 MOVL BX, g(CX) 873 RET 874 875 // void setg_gcc(G*); set g. for use by gcc 876 TEXT setg_gcc<>(SB), NOSPLIT, $0 877 get_tls(AX) 878 MOVL gg+0(FP), DX 879 MOVL DX, g(AX) 880 RET 881 882 TEXT runtime·abort(SB),NOSPLIT,$0-0 883 INT $3 884 loop: 885 JMP loop 886 887 // check that SP is in range [g->stack.lo, g->stack.hi) 888 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 889 get_tls(CX) 890 MOVL g(CX), AX 891 CMPL (g_stack+stack_hi)(AX), SP 892 JHI 2(PC) 893 CALL runtime·abort(SB) 894 CMPL SP, (g_stack+stack_lo)(AX) 895 JHI 2(PC) 896 CALL runtime·abort(SB) 897 RET 898 899 // func cputicks() int64 900 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 901 // LFENCE/MFENCE instruction support is dependent on SSE2. 902 // When no SSE2 support is present do not enforce any serialization 903 // since using CPUID to serialize the instruction stream is 904 // very costly. 905 #ifdef GO386_softfloat 906 JMP rdtsc // no fence instructions available 907 #endif 908 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1 909 JNE fences 910 // Instruction stream serializing RDTSCP is supported. 911 // RDTSCP is supported by Intel Nehalem (2008) and 912 // AMD K8 Rev. F (2006) and newer. 913 RDTSCP 914 done: 915 MOVL AX, ret_lo+0(FP) 916 MOVL DX, ret_hi+4(FP) 917 RET 918 fences: 919 // MFENCE is instruction stream serializing and flushes the 920 // store buffers on AMD. The serialization semantics of LFENCE on AMD 921 // are dependent on MSR C001_1029 and CPU generation. 922 // LFENCE on Intel does wait for all previous instructions to have executed. 923 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all 924 // previous instructions executed and all previous loads and stores to globally visible. 925 // Using MFENCE;LFENCE here aligns the serializing properties without 926 // runtime detection of CPU manufacturer. 927 MFENCE 928 LFENCE 929 rdtsc: 930 RDTSC 931 JMP done 932 933 TEXT ldt0setup<>(SB),NOSPLIT,$16-0 934 #ifdef GOOS_windows 935 CALL runtime·wintls(SB) 936 #endif 937 // set up ldt 7 to point at m0.tls 938 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 939 // the entry number is just a hint. setldt will set up GS with what it used. 940 MOVL $7, 0(SP) 941 LEAL runtime·m0+m_tls(SB), AX 942 MOVL AX, 4(SP) 943 MOVL $32, 8(SP) // sizeof(tls array) 944 CALL runtime·setldt(SB) 945 RET 946 947 TEXT runtime·emptyfunc(SB),0,$0-0 948 RET 949 950 // hash function using AES hardware instructions 951 TEXT runtime·memhash(SB),NOSPLIT,$0-16 952 CMPB runtime·useAeshash(SB), $0 953 JEQ noaes 954 MOVL p+0(FP), AX // ptr to data 955 MOVL s+8(FP), BX // size 956 LEAL ret+12(FP), DX 957 JMP aeshashbody<>(SB) 958 noaes: 959 JMP runtime·memhashFallback(SB) 960 961 TEXT runtime·strhash(SB),NOSPLIT,$0-12 962 CMPB runtime·useAeshash(SB), $0 963 JEQ noaes 964 MOVL p+0(FP), AX // ptr to string object 965 MOVL 4(AX), BX // length of string 966 MOVL (AX), AX // string data 967 LEAL ret+8(FP), DX 968 JMP aeshashbody<>(SB) 969 noaes: 970 JMP runtime·strhashFallback(SB) 971 972 // AX: data 973 // BX: length 974 // DX: address to put return value 975 TEXT aeshashbody<>(SB),NOSPLIT,$0-0 976 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 977 PINSRW $4, BX, X0 // 16 bits of length 978 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 979 MOVO X0, X1 // save unscrambled seed 980 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 981 AESENC X0, X0 // scramble seed 982 983 CMPL BX, $16 984 JB aes0to15 985 JE aes16 986 CMPL BX, $32 987 JBE aes17to32 988 CMPL BX, $64 989 JBE aes33to64 990 JMP aes65plus 991 992 aes0to15: 993 TESTL BX, BX 994 JE aes0 995 996 ADDL $16, AX 997 TESTW $0xff0, AX 998 JE endofpage 999 1000 // 16 bytes loaded at this address won't cross 1001 // a page boundary, so we can load it directly. 1002 MOVOU -16(AX), X1 1003 ADDL BX, BX 1004 PAND masks<>(SB)(BX*8), X1 1005 1006 final1: 1007 PXOR X0, X1 // xor data with seed 1008 AESENC X1, X1 // scramble combo 3 times 1009 AESENC X1, X1 1010 AESENC X1, X1 1011 MOVL X1, (DX) 1012 RET 1013 1014 endofpage: 1015 // address ends in 1111xxxx. Might be up against 1016 // a page boundary, so load ending at last byte. 1017 // Then shift bytes down using pshufb. 1018 MOVOU -32(AX)(BX*1), X1 1019 ADDL BX, BX 1020 PSHUFB shifts<>(SB)(BX*8), X1 1021 JMP final1 1022 1023 aes0: 1024 // Return scrambled input seed 1025 AESENC X0, X0 1026 MOVL X0, (DX) 1027 RET 1028 1029 aes16: 1030 MOVOU (AX), X1 1031 JMP final1 1032 1033 aes17to32: 1034 // make second starting seed 1035 PXOR runtime·aeskeysched+16(SB), X1 1036 AESENC X1, X1 1037 1038 // load data to be hashed 1039 MOVOU (AX), X2 1040 MOVOU -16(AX)(BX*1), X3 1041 1042 // xor with seed 1043 PXOR X0, X2 1044 PXOR X1, X3 1045 1046 // scramble 3 times 1047 AESENC X2, X2 1048 AESENC X3, X3 1049 AESENC X2, X2 1050 AESENC X3, X3 1051 AESENC X2, X2 1052 AESENC X3, X3 1053 1054 // combine results 1055 PXOR X3, X2 1056 MOVL X2, (DX) 1057 RET 1058 1059 aes33to64: 1060 // make 3 more starting seeds 1061 MOVO X1, X2 1062 MOVO X1, X3 1063 PXOR runtime·aeskeysched+16(SB), X1 1064 PXOR runtime·aeskeysched+32(SB), X2 1065 PXOR runtime·aeskeysched+48(SB), X3 1066 AESENC X1, X1 1067 AESENC X2, X2 1068 AESENC X3, X3 1069 1070 MOVOU (AX), X4 1071 MOVOU 16(AX), X5 1072 MOVOU -32(AX)(BX*1), X6 1073 MOVOU -16(AX)(BX*1), X7 1074 1075 PXOR X0, X4 1076 PXOR X1, X5 1077 PXOR X2, X6 1078 PXOR X3, X7 1079 1080 AESENC X4, X4 1081 AESENC X5, X5 1082 AESENC X6, X6 1083 AESENC X7, X7 1084 1085 AESENC X4, X4 1086 AESENC X5, X5 1087 AESENC X6, X6 1088 AESENC X7, X7 1089 1090 AESENC X4, X4 1091 AESENC X5, X5 1092 AESENC X6, X6 1093 AESENC X7, X7 1094 1095 PXOR X6, X4 1096 PXOR X7, X5 1097 PXOR X5, X4 1098 MOVL X4, (DX) 1099 RET 1100 1101 aes65plus: 1102 // make 3 more starting seeds 1103 MOVO X1, X2 1104 MOVO X1, X3 1105 PXOR runtime·aeskeysched+16(SB), X1 1106 PXOR runtime·aeskeysched+32(SB), X2 1107 PXOR runtime·aeskeysched+48(SB), X3 1108 AESENC X1, X1 1109 AESENC X2, X2 1110 AESENC X3, X3 1111 1112 // start with last (possibly overlapping) block 1113 MOVOU -64(AX)(BX*1), X4 1114 MOVOU -48(AX)(BX*1), X5 1115 MOVOU -32(AX)(BX*1), X6 1116 MOVOU -16(AX)(BX*1), X7 1117 1118 // scramble state once 1119 AESENC X0, X4 1120 AESENC X1, X5 1121 AESENC X2, X6 1122 AESENC X3, X7 1123 1124 // compute number of remaining 64-byte blocks 1125 DECL BX 1126 SHRL $6, BX 1127 1128 aesloop: 1129 // scramble state, xor in a block 1130 MOVOU (AX), X0 1131 MOVOU 16(AX), X1 1132 MOVOU 32(AX), X2 1133 MOVOU 48(AX), X3 1134 AESENC X0, X4 1135 AESENC X1, X5 1136 AESENC X2, X6 1137 AESENC X3, X7 1138 1139 // scramble state 1140 AESENC X4, X4 1141 AESENC X5, X5 1142 AESENC X6, X6 1143 AESENC X7, X7 1144 1145 ADDL $64, AX 1146 DECL BX 1147 JNE aesloop 1148 1149 // 3 more scrambles to finish 1150 AESENC X4, X4 1151 AESENC X5, X5 1152 AESENC X6, X6 1153 AESENC X7, X7 1154 1155 AESENC X4, X4 1156 AESENC X5, X5 1157 AESENC X6, X6 1158 AESENC X7, X7 1159 1160 AESENC X4, X4 1161 AESENC X5, X5 1162 AESENC X6, X6 1163 AESENC X7, X7 1164 1165 PXOR X6, X4 1166 PXOR X7, X5 1167 PXOR X5, X4 1168 MOVL X4, (DX) 1169 RET 1170 1171 TEXT runtime·memhash32(SB),NOSPLIT,$0-12 1172 CMPB runtime·useAeshash(SB), $0 1173 JEQ noaes 1174 MOVL p+0(FP), AX // ptr to data 1175 MOVL h+4(FP), X0 // seed 1176 PINSRD $1, (AX), X0 // data 1177 AESENC runtime·aeskeysched+0(SB), X0 1178 AESENC runtime·aeskeysched+16(SB), X0 1179 AESENC runtime·aeskeysched+32(SB), X0 1180 MOVL X0, ret+8(FP) 1181 RET 1182 noaes: 1183 JMP runtime·memhash32Fallback(SB) 1184 1185 TEXT runtime·memhash64(SB),NOSPLIT,$0-12 1186 CMPB runtime·useAeshash(SB), $0 1187 JEQ noaes 1188 MOVL p+0(FP), AX // ptr to data 1189 MOVQ (AX), X0 // data 1190 PINSRD $2, h+4(FP), X0 // seed 1191 AESENC runtime·aeskeysched+0(SB), X0 1192 AESENC runtime·aeskeysched+16(SB), X0 1193 AESENC runtime·aeskeysched+32(SB), X0 1194 MOVL X0, ret+8(FP) 1195 RET 1196 noaes: 1197 JMP runtime·memhash64Fallback(SB) 1198 1199 // simple mask to get rid of data in the high part of the register. 1200 DATA masks<>+0x00(SB)/4, $0x00000000 1201 DATA masks<>+0x04(SB)/4, $0x00000000 1202 DATA masks<>+0x08(SB)/4, $0x00000000 1203 DATA masks<>+0x0c(SB)/4, $0x00000000 1204 1205 DATA masks<>+0x10(SB)/4, $0x000000ff 1206 DATA masks<>+0x14(SB)/4, $0x00000000 1207 DATA masks<>+0x18(SB)/4, $0x00000000 1208 DATA masks<>+0x1c(SB)/4, $0x00000000 1209 1210 DATA masks<>+0x20(SB)/4, $0x0000ffff 1211 DATA masks<>+0x24(SB)/4, $0x00000000 1212 DATA masks<>+0x28(SB)/4, $0x00000000 1213 DATA masks<>+0x2c(SB)/4, $0x00000000 1214 1215 DATA masks<>+0x30(SB)/4, $0x00ffffff 1216 DATA masks<>+0x34(SB)/4, $0x00000000 1217 DATA masks<>+0x38(SB)/4, $0x00000000 1218 DATA masks<>+0x3c(SB)/4, $0x00000000 1219 1220 DATA masks<>+0x40(SB)/4, $0xffffffff 1221 DATA masks<>+0x44(SB)/4, $0x00000000 1222 DATA masks<>+0x48(SB)/4, $0x00000000 1223 DATA masks<>+0x4c(SB)/4, $0x00000000 1224 1225 DATA masks<>+0x50(SB)/4, $0xffffffff 1226 DATA masks<>+0x54(SB)/4, $0x000000ff 1227 DATA masks<>+0x58(SB)/4, $0x00000000 1228 DATA masks<>+0x5c(SB)/4, $0x00000000 1229 1230 DATA masks<>+0x60(SB)/4, $0xffffffff 1231 DATA masks<>+0x64(SB)/4, $0x0000ffff 1232 DATA masks<>+0x68(SB)/4, $0x00000000 1233 DATA masks<>+0x6c(SB)/4, $0x00000000 1234 1235 DATA masks<>+0x70(SB)/4, $0xffffffff 1236 DATA masks<>+0x74(SB)/4, $0x00ffffff 1237 DATA masks<>+0x78(SB)/4, $0x00000000 1238 DATA masks<>+0x7c(SB)/4, $0x00000000 1239 1240 DATA masks<>+0x80(SB)/4, $0xffffffff 1241 DATA masks<>+0x84(SB)/4, $0xffffffff 1242 DATA masks<>+0x88(SB)/4, $0x00000000 1243 DATA masks<>+0x8c(SB)/4, $0x00000000 1244 1245 DATA masks<>+0x90(SB)/4, $0xffffffff 1246 DATA masks<>+0x94(SB)/4, $0xffffffff 1247 DATA masks<>+0x98(SB)/4, $0x000000ff 1248 DATA masks<>+0x9c(SB)/4, $0x00000000 1249 1250 DATA masks<>+0xa0(SB)/4, $0xffffffff 1251 DATA masks<>+0xa4(SB)/4, $0xffffffff 1252 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1253 DATA masks<>+0xac(SB)/4, $0x00000000 1254 1255 DATA masks<>+0xb0(SB)/4, $0xffffffff 1256 DATA masks<>+0xb4(SB)/4, $0xffffffff 1257 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1258 DATA masks<>+0xbc(SB)/4, $0x00000000 1259 1260 DATA masks<>+0xc0(SB)/4, $0xffffffff 1261 DATA masks<>+0xc4(SB)/4, $0xffffffff 1262 DATA masks<>+0xc8(SB)/4, $0xffffffff 1263 DATA masks<>+0xcc(SB)/4, $0x00000000 1264 1265 DATA masks<>+0xd0(SB)/4, $0xffffffff 1266 DATA masks<>+0xd4(SB)/4, $0xffffffff 1267 DATA masks<>+0xd8(SB)/4, $0xffffffff 1268 DATA masks<>+0xdc(SB)/4, $0x000000ff 1269 1270 DATA masks<>+0xe0(SB)/4, $0xffffffff 1271 DATA masks<>+0xe4(SB)/4, $0xffffffff 1272 DATA masks<>+0xe8(SB)/4, $0xffffffff 1273 DATA masks<>+0xec(SB)/4, $0x0000ffff 1274 1275 DATA masks<>+0xf0(SB)/4, $0xffffffff 1276 DATA masks<>+0xf4(SB)/4, $0xffffffff 1277 DATA masks<>+0xf8(SB)/4, $0xffffffff 1278 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1279 1280 GLOBL masks<>(SB),RODATA,$256 1281 1282 // these are arguments to pshufb. They move data down from 1283 // the high bytes of the register to the low bytes of the register. 1284 // index is how many bytes to move. 1285 DATA shifts<>+0x00(SB)/4, $0x00000000 1286 DATA shifts<>+0x04(SB)/4, $0x00000000 1287 DATA shifts<>+0x08(SB)/4, $0x00000000 1288 DATA shifts<>+0x0c(SB)/4, $0x00000000 1289 1290 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1291 DATA shifts<>+0x14(SB)/4, $0xffffffff 1292 DATA shifts<>+0x18(SB)/4, $0xffffffff 1293 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1294 1295 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1296 DATA shifts<>+0x24(SB)/4, $0xffffffff 1297 DATA shifts<>+0x28(SB)/4, $0xffffffff 1298 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1299 1300 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1301 DATA shifts<>+0x34(SB)/4, $0xffffffff 1302 DATA shifts<>+0x38(SB)/4, $0xffffffff 1303 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1304 1305 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1306 DATA shifts<>+0x44(SB)/4, $0xffffffff 1307 DATA shifts<>+0x48(SB)/4, $0xffffffff 1308 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1309 1310 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1311 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1312 DATA shifts<>+0x58(SB)/4, $0xffffffff 1313 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1314 1315 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1316 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1317 DATA shifts<>+0x68(SB)/4, $0xffffffff 1318 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1319 1320 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1321 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1322 DATA shifts<>+0x78(SB)/4, $0xffffffff 1323 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1324 1325 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1326 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1327 DATA shifts<>+0x88(SB)/4, $0xffffffff 1328 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1329 1330 DATA shifts<>+0x90(SB)/4, $0x0a090807 1331 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1332 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1333 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1334 1335 DATA shifts<>+0xa0(SB)/4, $0x09080706 1336 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1337 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1338 DATA shifts<>+0xac(SB)/4, $0xffffffff 1339 1340 DATA shifts<>+0xb0(SB)/4, $0x08070605 1341 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1342 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1343 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1344 1345 DATA shifts<>+0xc0(SB)/4, $0x07060504 1346 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1347 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1348 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1349 1350 DATA shifts<>+0xd0(SB)/4, $0x06050403 1351 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1352 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1353 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1354 1355 DATA shifts<>+0xe0(SB)/4, $0x05040302 1356 DATA shifts<>+0xe4(SB)/4, $0x09080706 1357 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1358 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1359 1360 DATA shifts<>+0xf0(SB)/4, $0x04030201 1361 DATA shifts<>+0xf4(SB)/4, $0x08070605 1362 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1363 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1364 1365 GLOBL shifts<>(SB),RODATA,$256 1366 1367 TEXT ·checkASM(SB),NOSPLIT,$0-1 1368 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1369 MOVL $masks<>(SB), AX 1370 MOVL $shifts<>(SB), BX 1371 ORL BX, AX 1372 TESTL $15, AX 1373 SETEQ ret+0(FP) 1374 RET 1375 1376 TEXT runtime·return0(SB), NOSPLIT, $0 1377 MOVL $0, AX 1378 RET 1379 1380 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1381 // Must obey the gcc calling convention. 1382 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1383 get_tls(CX) 1384 MOVL g(CX), AX 1385 MOVL g_m(AX), AX 1386 MOVL m_curg(AX), AX 1387 MOVL (g_stack+stack_hi)(AX), AX 1388 RET 1389 1390 // The top-most function running on a goroutine 1391 // returns to goexit+PCQuantum. 1392 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0 1393 BYTE $0x90 // NOP 1394 CALL runtime·goexit1(SB) // does not return 1395 // traceback from goexit1 must hit code range of goexit 1396 BYTE $0x90 // NOP 1397 1398 // Add a module's moduledata to the linked list of moduledata objects. This 1399 // is called from .init_array by a function generated in the linker and so 1400 // follows the platform ABI wrt register preservation -- it only touches AX, 1401 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1402 // instead the pointer to the moduledata is passed in AX. 1403 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1404 MOVL runtime·lastmoduledatap(SB), DX 1405 MOVL AX, moduledata_next(DX) 1406 MOVL AX, runtime·lastmoduledatap(SB) 1407 RET 1408 1409 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1410 MOVL a+0(FP), AX 1411 MOVL AX, 0(SP) 1412 MOVL $0, 4(SP) 1413 FMOVV 0(SP), F0 1414 FMOVDP F0, ret+4(FP) 1415 RET 1416 1417 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1418 FMOVD a+0(FP), F0 1419 FSTCW 0(SP) 1420 FLDCW runtime·controlWord64trunc(SB) 1421 FMOVVP F0, 4(SP) 1422 FLDCW 0(SP) 1423 MOVL 4(SP), AX 1424 MOVL AX, ret+8(FP) 1425 RET 1426 1427 // gcWriteBarrier informs the GC about heap pointer writes. 1428 // 1429 // gcWriteBarrier returns space in a write barrier buffer which 1430 // should be filled in by the caller. 1431 // gcWriteBarrier does NOT follow the Go ABI. It accepts the 1432 // number of bytes of buffer needed in DI, and returns a pointer 1433 // to the buffer space in DI. 1434 // It clobbers FLAGS. It does not clobber any general-purpose registers, 1435 // but may clobber others (e.g., SSE registers). 1436 // Typical use would be, when doing *(CX+88) = AX 1437 // CMPL $0, runtime.writeBarrier(SB) 1438 // JEQ dowrite 1439 // CALL runtime.gcBatchBarrier2(SB) 1440 // MOVL AX, (DI) 1441 // MOVL 88(CX), DX 1442 // MOVL DX, 4(DI) 1443 // dowrite: 1444 // MOVL AX, 88(CX) 1445 TEXT gcWriteBarrier<>(SB),NOSPLIT,$28 1446 // Save the registers clobbered by the fast path. This is slightly 1447 // faster than having the caller spill these. 1448 MOVL CX, 20(SP) 1449 MOVL BX, 24(SP) 1450 retry: 1451 // TODO: Consider passing g.m.p in as an argument so they can be shared 1452 // across a sequence of write barriers. 1453 get_tls(BX) 1454 MOVL g(BX), BX 1455 MOVL g_m(BX), BX 1456 MOVL m_p(BX), BX 1457 // Get current buffer write position. 1458 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position 1459 ADDL DI, CX // new next position 1460 // Is the buffer full? 1461 CMPL CX, (p_wbBuf+wbBuf_end)(BX) 1462 JA flush 1463 // Commit to the larger buffer. 1464 MOVL CX, (p_wbBuf+wbBuf_next)(BX) 1465 // Make return value (the original next position) 1466 SUBL DI, CX 1467 MOVL CX, DI 1468 // Restore registers. 1469 MOVL 20(SP), CX 1470 MOVL 24(SP), BX 1471 RET 1472 1473 flush: 1474 // Save all general purpose registers since these could be 1475 // clobbered by wbBufFlush and were not saved by the caller. 1476 MOVL DI, 0(SP) 1477 MOVL AX, 4(SP) 1478 // BX already saved 1479 // CX already saved 1480 MOVL DX, 8(SP) 1481 MOVL BP, 12(SP) 1482 MOVL SI, 16(SP) 1483 // DI already saved 1484 1485 CALL runtime·wbBufFlush(SB) 1486 1487 MOVL 0(SP), DI 1488 MOVL 4(SP), AX 1489 MOVL 8(SP), DX 1490 MOVL 12(SP), BP 1491 MOVL 16(SP), SI 1492 JMP retry 1493 1494 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0 1495 MOVL $4, DI 1496 JMP gcWriteBarrier<>(SB) 1497 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0 1498 MOVL $8, DI 1499 JMP gcWriteBarrier<>(SB) 1500 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0 1501 MOVL $12, DI 1502 JMP gcWriteBarrier<>(SB) 1503 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0 1504 MOVL $16, DI 1505 JMP gcWriteBarrier<>(SB) 1506 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0 1507 MOVL $20, DI 1508 JMP gcWriteBarrier<>(SB) 1509 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0 1510 MOVL $24, DI 1511 JMP gcWriteBarrier<>(SB) 1512 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0 1513 MOVL $28, DI 1514 JMP gcWriteBarrier<>(SB) 1515 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0 1516 MOVL $32, DI 1517 JMP gcWriteBarrier<>(SB) 1518 1519 // Note: these functions use a special calling convention to save generated code space. 1520 // Arguments are passed in registers, but the space for those arguments are allocated 1521 // in the caller's stack frame. These stubs write the args into that stack space and 1522 // then tail call to the corresponding runtime handler. 1523 // The tail call makes these stubs disappear in backtraces. 1524 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8 1525 MOVL AX, x+0(FP) 1526 MOVL CX, y+4(FP) 1527 JMP runtime·goPanicIndex(SB) 1528 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8 1529 MOVL AX, x+0(FP) 1530 MOVL CX, y+4(FP) 1531 JMP runtime·goPanicIndexU(SB) 1532 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8 1533 MOVL CX, x+0(FP) 1534 MOVL DX, y+4(FP) 1535 JMP runtime·goPanicSliceAlen(SB) 1536 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8 1537 MOVL CX, x+0(FP) 1538 MOVL DX, y+4(FP) 1539 JMP runtime·goPanicSliceAlenU(SB) 1540 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8 1541 MOVL CX, x+0(FP) 1542 MOVL DX, y+4(FP) 1543 JMP runtime·goPanicSliceAcap(SB) 1544 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8 1545 MOVL CX, x+0(FP) 1546 MOVL DX, y+4(FP) 1547 JMP runtime·goPanicSliceAcapU(SB) 1548 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8 1549 MOVL AX, x+0(FP) 1550 MOVL CX, y+4(FP) 1551 JMP runtime·goPanicSliceB(SB) 1552 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8 1553 MOVL AX, x+0(FP) 1554 MOVL CX, y+4(FP) 1555 JMP runtime·goPanicSliceBU(SB) 1556 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8 1557 MOVL DX, x+0(FP) 1558 MOVL BX, y+4(FP) 1559 JMP runtime·goPanicSlice3Alen(SB) 1560 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8 1561 MOVL DX, x+0(FP) 1562 MOVL BX, y+4(FP) 1563 JMP runtime·goPanicSlice3AlenU(SB) 1564 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8 1565 MOVL DX, x+0(FP) 1566 MOVL BX, y+4(FP) 1567 JMP runtime·goPanicSlice3Acap(SB) 1568 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8 1569 MOVL DX, x+0(FP) 1570 MOVL BX, y+4(FP) 1571 JMP runtime·goPanicSlice3AcapU(SB) 1572 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8 1573 MOVL CX, x+0(FP) 1574 MOVL DX, y+4(FP) 1575 JMP runtime·goPanicSlice3B(SB) 1576 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8 1577 MOVL CX, x+0(FP) 1578 MOVL DX, y+4(FP) 1579 JMP runtime·goPanicSlice3BU(SB) 1580 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8 1581 MOVL AX, x+0(FP) 1582 MOVL CX, y+4(FP) 1583 JMP runtime·goPanicSlice3C(SB) 1584 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8 1585 MOVL AX, x+0(FP) 1586 MOVL CX, y+4(FP) 1587 JMP runtime·goPanicSlice3CU(SB) 1588 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8 1589 MOVL DX, x+0(FP) 1590 MOVL BX, y+4(FP) 1591 JMP runtime·goPanicSliceConvert(SB) 1592 1593 // Extended versions for 64-bit indexes. 1594 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12 1595 MOVL SI, hi+0(FP) 1596 MOVL AX, lo+4(FP) 1597 MOVL CX, y+8(FP) 1598 JMP runtime·goPanicExtendIndex(SB) 1599 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12 1600 MOVL SI, hi+0(FP) 1601 MOVL AX, lo+4(FP) 1602 MOVL CX, y+8(FP) 1603 JMP runtime·goPanicExtendIndexU(SB) 1604 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12 1605 MOVL SI, hi+0(FP) 1606 MOVL CX, lo+4(FP) 1607 MOVL DX, y+8(FP) 1608 JMP runtime·goPanicExtendSliceAlen(SB) 1609 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12 1610 MOVL SI, hi+0(FP) 1611 MOVL CX, lo+4(FP) 1612 MOVL DX, y+8(FP) 1613 JMP runtime·goPanicExtendSliceAlenU(SB) 1614 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12 1615 MOVL SI, hi+0(FP) 1616 MOVL CX, lo+4(FP) 1617 MOVL DX, y+8(FP) 1618 JMP runtime·goPanicExtendSliceAcap(SB) 1619 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12 1620 MOVL SI, hi+0(FP) 1621 MOVL CX, lo+4(FP) 1622 MOVL DX, y+8(FP) 1623 JMP runtime·goPanicExtendSliceAcapU(SB) 1624 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12 1625 MOVL SI, hi+0(FP) 1626 MOVL AX, lo+4(FP) 1627 MOVL CX, y+8(FP) 1628 JMP runtime·goPanicExtendSliceB(SB) 1629 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12 1630 MOVL SI, hi+0(FP) 1631 MOVL AX, lo+4(FP) 1632 MOVL CX, y+8(FP) 1633 JMP runtime·goPanicExtendSliceBU(SB) 1634 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12 1635 MOVL SI, hi+0(FP) 1636 MOVL DX, lo+4(FP) 1637 MOVL BX, y+8(FP) 1638 JMP runtime·goPanicExtendSlice3Alen(SB) 1639 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12 1640 MOVL SI, hi+0(FP) 1641 MOVL DX, lo+4(FP) 1642 MOVL BX, y+8(FP) 1643 JMP runtime·goPanicExtendSlice3AlenU(SB) 1644 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12 1645 MOVL SI, hi+0(FP) 1646 MOVL DX, lo+4(FP) 1647 MOVL BX, y+8(FP) 1648 JMP runtime·goPanicExtendSlice3Acap(SB) 1649 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12 1650 MOVL SI, hi+0(FP) 1651 MOVL DX, lo+4(FP) 1652 MOVL BX, y+8(FP) 1653 JMP runtime·goPanicExtendSlice3AcapU(SB) 1654 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12 1655 MOVL SI, hi+0(FP) 1656 MOVL CX, lo+4(FP) 1657 MOVL DX, y+8(FP) 1658 JMP runtime·goPanicExtendSlice3B(SB) 1659 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12 1660 MOVL SI, hi+0(FP) 1661 MOVL CX, lo+4(FP) 1662 MOVL DX, y+8(FP) 1663 JMP runtime·goPanicExtendSlice3BU(SB) 1664 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12 1665 MOVL SI, hi+0(FP) 1666 MOVL AX, lo+4(FP) 1667 MOVL CX, y+8(FP) 1668 JMP runtime·goPanicExtendSlice3C(SB) 1669 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12 1670 MOVL SI, hi+0(FP) 1671 MOVL AX, lo+4(FP) 1672 MOVL CX, y+8(FP) 1673 JMP runtime·goPanicExtendSlice3CU(SB) 1674 1675 #ifdef GOOS_android 1676 // Use the free TLS_SLOT_APP slot #2 on Android Q. 1677 // Earlier androids are set up in gcc_android.c. 1678 DATA runtime·tls_g+0(SB)/4, $8 1679 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1680 #endif 1681 #ifdef GOOS_windows 1682 GLOBL runtime·tls_g+0(SB), NOPTR, $4 1683 #endif