github.com/alash3al/go@v0.0.0-20150827002835-d497eeb00540/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it to let it 53 // initialize and to set up GS. if not, 54 // we set up GS ourselves. 55 MOVL _cgo_init(SB), AX 56 TESTL AX, AX 57 JZ needtls 58 MOVL $setg_gcc<>(SB), BX 59 MOVL BX, 4(SP) 60 MOVL BP, 0(SP) 61 CALL AX 62 63 // update stackguard after _cgo_init 64 MOVL $runtime·g0(SB), CX 65 MOVL (g_stack+stack_lo)(CX), AX 66 ADDL $const__StackGuard, AX 67 MOVL AX, g_stackguard0(CX) 68 MOVL AX, g_stackguard1(CX) 69 70 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 71 CMPL runtime·iswindows(SB), $0 72 JEQ ok 73 needtls: 74 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 75 CMPL runtime·isplan9(SB), $1 76 JEQ ok 77 78 // set up %gs 79 CALL runtime·ldt0setup(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVL $0x123, g(BX) 84 MOVL runtime·tls0(SB), AX 85 CMPL AX, $0x123 86 JEQ ok 87 MOVL AX, 0 // abort 88 ok: 89 // set up m and g "registers" 90 get_tls(BX) 91 LEAL runtime·g0(SB), CX 92 MOVL CX, g(BX) 93 LEAL runtime·m0(SB), AX 94 95 // save m->g0 = g0 96 MOVL CX, m_g0(AX) 97 // save g0->m = m0 98 MOVL AX, g_m(CX) 99 100 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 101 102 // convention is D is always cleared 103 CLD 104 105 CALL runtime·check(SB) 106 107 // saved argc, argv 108 MOVL 120(SP), AX 109 MOVL AX, 0(SP) 110 MOVL 124(SP), AX 111 MOVL AX, 4(SP) 112 CALL runtime·args(SB) 113 CALL runtime·osinit(SB) 114 CALL runtime·schedinit(SB) 115 116 // create a new goroutine to start program 117 PUSHL $runtime·mainPC(SB) // entry 118 PUSHL $0 // arg size 119 CALL runtime·newproc(SB) 120 POPL AX 121 POPL AX 122 123 // start this M 124 CALL runtime·mstart(SB) 125 126 INT $3 127 RET 128 129 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 130 GLOBL runtime·mainPC(SB),RODATA,$4 131 132 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 133 INT $3 134 RET 135 136 TEXT runtime·asminit(SB),NOSPLIT,$0-0 137 // Linux and MinGW start the FPU in extended double precision. 138 // Other operating systems use double precision. 139 // Change to double precision to match them, 140 // and to match other hardware that only has double. 141 PUSHL $0x27F 142 FLDCW 0(SP) 143 POPL AX 144 RET 145 146 /* 147 * go-routine 148 */ 149 150 // void gosave(Gobuf*) 151 // save state in Gobuf; setjmp 152 TEXT runtime·gosave(SB), NOSPLIT, $0-4 153 MOVL buf+0(FP), AX // gobuf 154 LEAL buf+0(FP), BX // caller's SP 155 MOVL BX, gobuf_sp(AX) 156 MOVL 0(SP), BX // caller's PC 157 MOVL BX, gobuf_pc(AX) 158 MOVL $0, gobuf_ret(AX) 159 MOVL $0, gobuf_ctxt(AX) 160 get_tls(CX) 161 MOVL g(CX), BX 162 MOVL BX, gobuf_g(AX) 163 RET 164 165 // void gogo(Gobuf*) 166 // restore state from Gobuf; longjmp 167 TEXT runtime·gogo(SB), NOSPLIT, $0-4 168 MOVL buf+0(FP), BX // gobuf 169 MOVL gobuf_g(BX), DX 170 MOVL 0(DX), CX // make sure g != nil 171 get_tls(CX) 172 MOVL DX, g(CX) 173 MOVL gobuf_sp(BX), SP // restore SP 174 MOVL gobuf_ret(BX), AX 175 MOVL gobuf_ctxt(BX), DX 176 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 177 MOVL $0, gobuf_ret(BX) 178 MOVL $0, gobuf_ctxt(BX) 179 MOVL gobuf_pc(BX), BX 180 JMP BX 181 182 // func mcall(fn func(*g)) 183 // Switch to m->g0's stack, call fn(g). 184 // Fn must never return. It should gogo(&g->sched) 185 // to keep running g. 186 TEXT runtime·mcall(SB), NOSPLIT, $0-4 187 MOVL fn+0(FP), DI 188 189 get_tls(CX) 190 MOVL g(CX), AX // save state in g->sched 191 MOVL 0(SP), BX // caller's PC 192 MOVL BX, (g_sched+gobuf_pc)(AX) 193 LEAL fn+0(FP), BX // caller's SP 194 MOVL BX, (g_sched+gobuf_sp)(AX) 195 MOVL AX, (g_sched+gobuf_g)(AX) 196 197 // switch to m->g0 & its stack, call fn 198 MOVL g(CX), BX 199 MOVL g_m(BX), BX 200 MOVL m_g0(BX), SI 201 CMPL SI, AX // if g == m->g0 call badmcall 202 JNE 3(PC) 203 MOVL $runtime·badmcall(SB), AX 204 JMP AX 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 207 PUSHL AX 208 MOVL DI, DX 209 MOVL 0(DI), DI 210 CALL DI 211 POPL AX 212 MOVL $runtime·badmcall2(SB), AX 213 JMP AX 214 RET 215 216 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 217 // of the G stack. We need to distinguish the routine that 218 // lives at the bottom of the G stack from the one that lives 219 // at the top of the system stack because the one at the top of 220 // the system stack terminates the stack walk (see topofstack()). 221 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 222 RET 223 224 // func systemstack(fn func()) 225 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 226 MOVL fn+0(FP), DI // DI = fn 227 get_tls(CX) 228 MOVL g(CX), AX // AX = g 229 MOVL g_m(AX), BX // BX = m 230 231 MOVL m_gsignal(BX), DX // DX = gsignal 232 CMPL AX, DX 233 JEQ noswitch 234 235 MOVL m_g0(BX), DX // DX = g0 236 CMPL AX, DX 237 JEQ noswitch 238 239 MOVL m_curg(BX), BP 240 CMPL AX, BP 241 JEQ switch 242 243 // Bad: g is not gsignal, not g0, not curg. What is it? 244 // Hide call from linker nosplit analysis. 245 MOVL $runtime·badsystemstack(SB), AX 246 CALL AX 247 248 switch: 249 // save our state in g->sched. Pretend to 250 // be systemstack_switch if the G stack is scanned. 251 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 252 MOVL SP, (g_sched+gobuf_sp)(AX) 253 MOVL AX, (g_sched+gobuf_g)(AX) 254 255 // switch to g0 256 MOVL DX, g(CX) 257 MOVL (g_sched+gobuf_sp)(DX), BX 258 // make it look like mstart called systemstack on g0, to stop traceback 259 SUBL $4, BX 260 MOVL $runtime·mstart(SB), DX 261 MOVL DX, 0(BX) 262 MOVL BX, SP 263 264 // call target function 265 MOVL DI, DX 266 MOVL 0(DI), DI 267 CALL DI 268 269 // switch back to g 270 get_tls(CX) 271 MOVL g(CX), AX 272 MOVL g_m(AX), BX 273 MOVL m_curg(BX), AX 274 MOVL AX, g(CX) 275 MOVL (g_sched+gobuf_sp)(AX), SP 276 MOVL $0, (g_sched+gobuf_sp)(AX) 277 RET 278 279 noswitch: 280 // already on system stack, just call directly 281 MOVL DI, DX 282 MOVL 0(DI), DI 283 CALL DI 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // 292 // The traceback routines see morestack on a g0 as being 293 // the top of a stack (for example, morestack calling newstack 294 // calling the scheduler calling newm calling gc), so we must 295 // record an argument size. For that purpose, it has no arguments. 296 TEXT runtime·morestack(SB),NOSPLIT,$0-0 297 // Cannot grow scheduler stack (m->g0). 298 get_tls(CX) 299 MOVL g(CX), BX 300 MOVL g_m(BX), BX 301 MOVL m_g0(BX), SI 302 CMPL g(CX), SI 303 JNE 2(PC) 304 INT $3 305 306 // Cannot grow signal stack. 307 MOVL m_gsignal(BX), SI 308 CMPL g(CX), SI 309 JNE 2(PC) 310 INT $3 311 312 // Called from f. 313 // Set m->morebuf to f's caller. 314 MOVL 4(SP), DI // f's caller's PC 315 MOVL DI, (m_morebuf+gobuf_pc)(BX) 316 LEAL 8(SP), CX // f's caller's SP 317 MOVL CX, (m_morebuf+gobuf_sp)(BX) 318 get_tls(CX) 319 MOVL g(CX), SI 320 MOVL SI, (m_morebuf+gobuf_g)(BX) 321 322 // Set g->sched to context in f. 323 MOVL 0(SP), AX // f's PC 324 MOVL AX, (g_sched+gobuf_pc)(SI) 325 MOVL SI, (g_sched+gobuf_g)(SI) 326 LEAL 4(SP), AX // f's SP 327 MOVL AX, (g_sched+gobuf_sp)(SI) 328 MOVL DX, (g_sched+gobuf_ctxt)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVL m_g0(BX), BP 332 MOVL BP, g(CX) 333 MOVL (g_sched+gobuf_sp)(BP), AX 334 MOVL -4(AX), BX // fault if CALL would, before smashing SP 335 MOVL AX, SP 336 CALL runtime·newstack(SB) 337 MOVL $0, 0x1003 // crash if newstack returns 338 RET 339 340 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 341 MOVL $0, DX 342 JMP runtime·morestack(SB) 343 344 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 345 // We came here via a RET to an overwritten return PC. 346 // AX may be live. Other registers are available. 347 348 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 349 get_tls(CX) 350 MOVL g(CX), CX 351 MOVL (g_stkbar+slice_array)(CX), DX 352 MOVL g_stkbarPos(CX), BX 353 IMULL $stkbar__size, BX // Too big for SIB. 354 MOVL stkbar_savedLRVal(DX)(BX*1), BX 355 // Record that this stack barrier was hit. 356 ADDL $1, g_stkbarPos(CX) 357 // Jump to the original return PC. 358 JMP BX 359 360 // reflectcall: call a function with the given argument list 361 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 362 // we don't have variable-sized frames, so we use a small number 363 // of constant-sized-frame functions to encode a few bits of size in the pc. 364 // Caution: ugly multiline assembly macros in your future! 365 366 #define DISPATCH(NAME,MAXSIZE) \ 367 CMPL CX, $MAXSIZE; \ 368 JA 3(PC); \ 369 MOVL $NAME(SB), AX; \ 370 JMP AX 371 // Note: can't just "JMP NAME(SB)" - bad inlining results. 372 373 TEXT reflect·call(SB), NOSPLIT, $0-0 374 JMP ·reflectcall(SB) 375 376 TEXT ·reflectcall(SB), NOSPLIT, $0-20 377 MOVL argsize+12(FP), CX 378 DISPATCH(runtime·call16, 16) 379 DISPATCH(runtime·call32, 32) 380 DISPATCH(runtime·call64, 64) 381 DISPATCH(runtime·call128, 128) 382 DISPATCH(runtime·call256, 256) 383 DISPATCH(runtime·call512, 512) 384 DISPATCH(runtime·call1024, 1024) 385 DISPATCH(runtime·call2048, 2048) 386 DISPATCH(runtime·call4096, 4096) 387 DISPATCH(runtime·call8192, 8192) 388 DISPATCH(runtime·call16384, 16384) 389 DISPATCH(runtime·call32768, 32768) 390 DISPATCH(runtime·call65536, 65536) 391 DISPATCH(runtime·call131072, 131072) 392 DISPATCH(runtime·call262144, 262144) 393 DISPATCH(runtime·call524288, 524288) 394 DISPATCH(runtime·call1048576, 1048576) 395 DISPATCH(runtime·call2097152, 2097152) 396 DISPATCH(runtime·call4194304, 4194304) 397 DISPATCH(runtime·call8388608, 8388608) 398 DISPATCH(runtime·call16777216, 16777216) 399 DISPATCH(runtime·call33554432, 33554432) 400 DISPATCH(runtime·call67108864, 67108864) 401 DISPATCH(runtime·call134217728, 134217728) 402 DISPATCH(runtime·call268435456, 268435456) 403 DISPATCH(runtime·call536870912, 536870912) 404 DISPATCH(runtime·call1073741824, 1073741824) 405 MOVL $runtime·badreflectcall(SB), AX 406 JMP AX 407 408 #define CALLFN(NAME,MAXSIZE) \ 409 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 410 NO_LOCAL_POINTERS; \ 411 /* copy arguments to stack */ \ 412 MOVL argptr+8(FP), SI; \ 413 MOVL argsize+12(FP), CX; \ 414 MOVL SP, DI; \ 415 REP;MOVSB; \ 416 /* call function */ \ 417 MOVL f+4(FP), DX; \ 418 MOVL (DX), AX; \ 419 PCDATA $PCDATA_StackMapIndex, $0; \ 420 CALL AX; \ 421 /* copy return values back */ \ 422 MOVL argptr+8(FP), DI; \ 423 MOVL argsize+12(FP), CX; \ 424 MOVL retoffset+16(FP), BX; \ 425 MOVL SP, SI; \ 426 ADDL BX, DI; \ 427 ADDL BX, SI; \ 428 SUBL BX, CX; \ 429 REP;MOVSB; \ 430 /* execute write barrier updates */ \ 431 MOVL argtype+0(FP), DX; \ 432 MOVL argptr+8(FP), DI; \ 433 MOVL argsize+12(FP), CX; \ 434 MOVL retoffset+16(FP), BX; \ 435 MOVL DX, 0(SP); \ 436 MOVL DI, 4(SP); \ 437 MOVL CX, 8(SP); \ 438 MOVL BX, 12(SP); \ 439 CALL runtime·callwritebarrier(SB); \ 440 RET 441 442 CALLFN(·call16, 16) 443 CALLFN(·call32, 32) 444 CALLFN(·call64, 64) 445 CALLFN(·call128, 128) 446 CALLFN(·call256, 256) 447 CALLFN(·call512, 512) 448 CALLFN(·call1024, 1024) 449 CALLFN(·call2048, 2048) 450 CALLFN(·call4096, 4096) 451 CALLFN(·call8192, 8192) 452 CALLFN(·call16384, 16384) 453 CALLFN(·call32768, 32768) 454 CALLFN(·call65536, 65536) 455 CALLFN(·call131072, 131072) 456 CALLFN(·call262144, 262144) 457 CALLFN(·call524288, 524288) 458 CALLFN(·call1048576, 1048576) 459 CALLFN(·call2097152, 2097152) 460 CALLFN(·call4194304, 4194304) 461 CALLFN(·call8388608, 8388608) 462 CALLFN(·call16777216, 16777216) 463 CALLFN(·call33554432, 33554432) 464 CALLFN(·call67108864, 67108864) 465 CALLFN(·call134217728, 134217728) 466 CALLFN(·call268435456, 268435456) 467 CALLFN(·call536870912, 536870912) 468 CALLFN(·call1073741824, 1073741824) 469 470 // bool cas(int32 *val, int32 old, int32 new) 471 // Atomically: 472 // if(*val == old){ 473 // *val = new; 474 // return 1; 475 // }else 476 // return 0; 477 TEXT runtime·cas(SB), NOSPLIT, $0-13 478 MOVL ptr+0(FP), BX 479 MOVL old+4(FP), AX 480 MOVL new+8(FP), CX 481 LOCK 482 CMPXCHGL CX, 0(BX) 483 SETEQ ret+12(FP) 484 RET 485 486 TEXT runtime·casuintptr(SB), NOSPLIT, $0-13 487 JMP runtime·cas(SB) 488 489 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8 490 JMP runtime·atomicload(SB) 491 492 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8 493 JMP runtime·atomicload(SB) 494 495 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8 496 JMP runtime·atomicstore(SB) 497 498 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 499 // Atomically: 500 // if(*val == *old){ 501 // *val = new; 502 // return 1; 503 // } else { 504 // return 0; 505 // } 506 TEXT runtime·cas64(SB), NOSPLIT, $0-21 507 MOVL ptr+0(FP), BP 508 MOVL old_lo+4(FP), AX 509 MOVL old_hi+8(FP), DX 510 MOVL new_lo+12(FP), BX 511 MOVL new_hi+16(FP), CX 512 LOCK 513 CMPXCHG8B 0(BP) 514 SETEQ ret+20(FP) 515 RET 516 517 // bool casp(void **p, void *old, void *new) 518 // Atomically: 519 // if(*p == old){ 520 // *p = new; 521 // return 1; 522 // }else 523 // return 0; 524 TEXT runtime·casp1(SB), NOSPLIT, $0-13 525 MOVL ptr+0(FP), BX 526 MOVL old+4(FP), AX 527 MOVL new+8(FP), CX 528 LOCK 529 CMPXCHGL CX, 0(BX) 530 SETEQ ret+12(FP) 531 RET 532 533 // uint32 xadd(uint32 volatile *val, int32 delta) 534 // Atomically: 535 // *val += delta; 536 // return *val; 537 TEXT runtime·xadd(SB), NOSPLIT, $0-12 538 MOVL ptr+0(FP), BX 539 MOVL delta+4(FP), AX 540 MOVL AX, CX 541 LOCK 542 XADDL AX, 0(BX) 543 ADDL CX, AX 544 MOVL AX, ret+8(FP) 545 RET 546 547 TEXT runtime·xchg(SB), NOSPLIT, $0-12 548 MOVL ptr+0(FP), BX 549 MOVL new+4(FP), AX 550 XCHGL AX, 0(BX) 551 MOVL AX, ret+8(FP) 552 RET 553 554 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 555 JMP runtime·xchg(SB) 556 557 TEXT runtime·procyield(SB),NOSPLIT,$0-0 558 MOVL cycles+0(FP), AX 559 again: 560 PAUSE 561 SUBL $1, AX 562 JNZ again 563 RET 564 565 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 566 MOVL ptr+0(FP), BX 567 MOVL val+4(FP), AX 568 XCHGL AX, 0(BX) 569 RET 570 571 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 572 MOVL ptr+0(FP), BX 573 MOVL val+4(FP), AX 574 XCHGL AX, 0(BX) 575 RET 576 577 // uint64 atomicload64(uint64 volatile* addr); 578 TEXT runtime·atomicload64(SB), NOSPLIT, $0-12 579 MOVL ptr+0(FP), AX 580 TESTL $7, AX 581 JZ 2(PC) 582 MOVL 0, AX // crash with nil ptr deref 583 LEAL ret_lo+4(FP), BX 584 // MOVQ (%EAX), %MM0 585 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 586 // MOVQ %MM0, 0(%EBX) 587 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 588 // EMMS 589 BYTE $0x0F; BYTE $0x77 590 RET 591 592 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); 593 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 594 MOVL ptr+0(FP), AX 595 TESTL $7, AX 596 JZ 2(PC) 597 MOVL 0, AX // crash with nil ptr deref 598 // MOVQ and EMMS were introduced on the Pentium MMX. 599 // MOVQ 0x8(%ESP), %MM0 600 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 601 // MOVQ %MM0, (%EAX) 602 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 603 // EMMS 604 BYTE $0x0F; BYTE $0x77 605 // This is essentially a no-op, but it provides required memory fencing. 606 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 607 MOVL $0, AX 608 LOCK 609 XADDL AX, (SP) 610 RET 611 612 // void runtime·atomicor8(byte volatile*, byte); 613 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 614 MOVL ptr+0(FP), AX 615 MOVB val+4(FP), BX 616 LOCK 617 ORB BX, (AX) 618 RET 619 620 // void runtime·atomicand8(byte volatile*, byte); 621 TEXT runtime·atomicand8(SB), NOSPLIT, $0-5 622 MOVL ptr+0(FP), AX 623 MOVB val+4(FP), BX 624 LOCK 625 ANDB BX, (AX) 626 RET 627 628 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 629 // Stores are already ordered on x86, so this is just a 630 // compile barrier. 631 RET 632 633 // void jmpdefer(fn, sp); 634 // called from deferreturn. 635 // 1. pop the caller 636 // 2. sub 5 bytes from the callers return 637 // 3. jmp to the argument 638 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 639 MOVL fv+0(FP), DX // fn 640 MOVL argp+4(FP), BX // caller sp 641 LEAL -4(BX), SP // caller sp after CALL 642 SUBL $5, (SP) // return to CALL again 643 MOVL 0(DX), BX 644 JMP BX // but first run the deferred function 645 646 // Save state of caller into g->sched. 647 TEXT gosave<>(SB),NOSPLIT,$0 648 PUSHL AX 649 PUSHL BX 650 get_tls(BX) 651 MOVL g(BX), BX 652 LEAL arg+0(FP), AX 653 MOVL AX, (g_sched+gobuf_sp)(BX) 654 MOVL -4(AX), AX 655 MOVL AX, (g_sched+gobuf_pc)(BX) 656 MOVL $0, (g_sched+gobuf_ret)(BX) 657 MOVL $0, (g_sched+gobuf_ctxt)(BX) 658 POPL BX 659 POPL AX 660 RET 661 662 // func asmcgocall(fn, arg unsafe.Pointer) int32 663 // Call fn(arg) on the scheduler stack, 664 // aligned appropriately for the gcc ABI. 665 // See cgocall.go for more details. 666 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 667 MOVL fn+0(FP), AX 668 MOVL arg+4(FP), BX 669 670 MOVL SP, DX 671 672 // Figure out if we need to switch to m->g0 stack. 673 // We get called to create new OS threads too, and those 674 // come in on the m->g0 stack already. 675 get_tls(CX) 676 MOVL g(CX), BP 677 MOVL g_m(BP), BP 678 MOVL m_g0(BP), SI 679 MOVL g(CX), DI 680 CMPL SI, DI 681 JEQ 4(PC) 682 CALL gosave<>(SB) 683 MOVL SI, g(CX) 684 MOVL (g_sched+gobuf_sp)(SI), SP 685 686 // Now on a scheduling stack (a pthread-created stack). 687 SUBL $32, SP 688 ANDL $~15, SP // alignment, perhaps unnecessary 689 MOVL DI, 8(SP) // save g 690 MOVL (g_stack+stack_hi)(DI), DI 691 SUBL DX, DI 692 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 693 MOVL BX, 0(SP) // first argument in x86-32 ABI 694 CALL AX 695 696 // Restore registers, g, stack pointer. 697 get_tls(CX) 698 MOVL 8(SP), DI 699 MOVL (g_stack+stack_hi)(DI), SI 700 SUBL 4(SP), SI 701 MOVL DI, g(CX) 702 MOVL SI, SP 703 704 MOVL AX, ret+8(FP) 705 RET 706 707 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 708 // Turn the fn into a Go func (by taking its address) and call 709 // cgocallback_gofunc. 710 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 711 LEAL fn+0(FP), AX 712 MOVL AX, 0(SP) 713 MOVL frame+4(FP), AX 714 MOVL AX, 4(SP) 715 MOVL framesize+8(FP), AX 716 MOVL AX, 8(SP) 717 MOVL $runtime·cgocallback_gofunc(SB), AX 718 CALL AX 719 RET 720 721 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 722 // See cgocall.go for more details. 723 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 724 NO_LOCAL_POINTERS 725 726 // If g is nil, Go did not create the current thread. 727 // Call needm to obtain one for temporary use. 728 // In this case, we're running on the thread stack, so there's 729 // lots of space, but the linker doesn't know. Hide the call from 730 // the linker analysis by using an indirect call through AX. 731 get_tls(CX) 732 #ifdef GOOS_windows 733 MOVL $0, BP 734 CMPL CX, $0 735 JEQ 2(PC) // TODO 736 #endif 737 MOVL g(CX), BP 738 CMPL BP, $0 739 JEQ needm 740 MOVL g_m(BP), BP 741 MOVL BP, DX // saved copy of oldm 742 JMP havem 743 needm: 744 MOVL $0, 0(SP) 745 MOVL $runtime·needm(SB), AX 746 CALL AX 747 MOVL 0(SP), DX 748 get_tls(CX) 749 MOVL g(CX), BP 750 MOVL g_m(BP), BP 751 752 // Set m->sched.sp = SP, so that if a panic happens 753 // during the function we are about to execute, it will 754 // have a valid SP to run on the g0 stack. 755 // The next few lines (after the havem label) 756 // will save this SP onto the stack and then write 757 // the same SP back to m->sched.sp. That seems redundant, 758 // but if an unrecovered panic happens, unwindm will 759 // restore the g->sched.sp from the stack location 760 // and then systemstack will try to use it. If we don't set it here, 761 // that restored SP will be uninitialized (typically 0) and 762 // will not be usable. 763 MOVL m_g0(BP), SI 764 MOVL SP, (g_sched+gobuf_sp)(SI) 765 766 havem: 767 // Now there's a valid m, and we're running on its m->g0. 768 // Save current m->g0->sched.sp on stack and then set it to SP. 769 // Save current sp in m->g0->sched.sp in preparation for 770 // switch back to m->curg stack. 771 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 772 MOVL m_g0(BP), SI 773 MOVL (g_sched+gobuf_sp)(SI), AX 774 MOVL AX, 0(SP) 775 MOVL SP, (g_sched+gobuf_sp)(SI) 776 777 // Switch to m->curg stack and call runtime.cgocallbackg. 778 // Because we are taking over the execution of m->curg 779 // but *not* resuming what had been running, we need to 780 // save that information (m->curg->sched) so we can restore it. 781 // We can restore m->curg->sched.sp easily, because calling 782 // runtime.cgocallbackg leaves SP unchanged upon return. 783 // To save m->curg->sched.pc, we push it onto the stack. 784 // This has the added benefit that it looks to the traceback 785 // routine like cgocallbackg is going to return to that 786 // PC (because the frame we allocate below has the same 787 // size as cgocallback_gofunc's frame declared above) 788 // so that the traceback will seamlessly trace back into 789 // the earlier calls. 790 // 791 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 792 // 4(SP) and 8(SP) are unused. 793 MOVL m_curg(BP), SI 794 MOVL SI, g(CX) 795 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 796 MOVL (g_sched+gobuf_pc)(SI), BP 797 MOVL BP, -4(DI) 798 LEAL -(4+12)(DI), SP 799 MOVL DX, 0(SP) 800 CALL runtime·cgocallbackg(SB) 801 MOVL 0(SP), DX 802 803 // Restore g->sched (== m->curg->sched) from saved values. 804 get_tls(CX) 805 MOVL g(CX), SI 806 MOVL 12(SP), BP 807 MOVL BP, (g_sched+gobuf_pc)(SI) 808 LEAL (12+4)(SP), DI 809 MOVL DI, (g_sched+gobuf_sp)(SI) 810 811 // Switch back to m->g0's stack and restore m->g0->sched.sp. 812 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 813 // so we do not have to restore it.) 814 MOVL g(CX), BP 815 MOVL g_m(BP), BP 816 MOVL m_g0(BP), SI 817 MOVL SI, g(CX) 818 MOVL (g_sched+gobuf_sp)(SI), SP 819 MOVL 0(SP), AX 820 MOVL AX, (g_sched+gobuf_sp)(SI) 821 822 // If the m on entry was nil, we called needm above to borrow an m 823 // for the duration of the call. Since the call is over, return it with dropm. 824 CMPL DX, $0 825 JNE 3(PC) 826 MOVL $runtime·dropm(SB), AX 827 CALL AX 828 829 // Done! 830 RET 831 832 // void setg(G*); set g. for use by needm. 833 TEXT runtime·setg(SB), NOSPLIT, $0-4 834 MOVL gg+0(FP), BX 835 #ifdef GOOS_windows 836 CMPL BX, $0 837 JNE settls 838 MOVL $0, 0x14(FS) 839 RET 840 settls: 841 MOVL g_m(BX), AX 842 LEAL m_tls(AX), AX 843 MOVL AX, 0x14(FS) 844 #endif 845 get_tls(CX) 846 MOVL BX, g(CX) 847 RET 848 849 // void setg_gcc(G*); set g. for use by gcc 850 TEXT setg_gcc<>(SB), NOSPLIT, $0 851 get_tls(AX) 852 MOVL gg+0(FP), DX 853 MOVL DX, g(AX) 854 RET 855 856 // check that SP is in range [g->stack.lo, g->stack.hi) 857 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 858 get_tls(CX) 859 MOVL g(CX), AX 860 CMPL (g_stack+stack_hi)(AX), SP 861 JHI 2(PC) 862 INT $3 863 CMPL SP, (g_stack+stack_lo)(AX) 864 JHI 2(PC) 865 INT $3 866 RET 867 868 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 869 MOVL argp+0(FP),AX // addr of first arg 870 MOVL -4(AX),AX // get calling pc 871 CMPL AX, runtime·stackBarrierPC(SB) 872 JNE nobar 873 // Get original return PC. 874 CALL runtime·nextBarrierPC(SB) 875 MOVL 0(SP), AX 876 nobar: 877 MOVL AX, ret+4(FP) 878 RET 879 880 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 881 MOVL argp+0(FP),AX // addr of first arg 882 MOVL pc+4(FP), BX 883 MOVL -4(AX), CX 884 CMPL CX, runtime·stackBarrierPC(SB) 885 JEQ setbar 886 MOVL BX, -4(AX) // set calling pc 887 RET 888 setbar: 889 // Set the stack barrier return PC. 890 MOVL BX, 0(SP) 891 CALL runtime·setNextBarrierPC(SB) 892 RET 893 894 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 895 MOVL argp+0(FP), AX 896 MOVL AX, ret+4(FP) 897 RET 898 899 // func cputicks() int64 900 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 901 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 902 JEQ done 903 CMPB runtime·lfenceBeforeRdtsc(SB), $1 904 JNE mfence 905 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 906 JMP done 907 mfence: 908 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 909 done: 910 RDTSC 911 MOVL AX, ret_lo+0(FP) 912 MOVL DX, ret_hi+4(FP) 913 RET 914 915 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 916 // set up ldt 7 to point at tls0 917 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 918 // the entry number is just a hint. setldt will set up GS with what it used. 919 MOVL $7, 0(SP) 920 LEAL runtime·tls0(SB), AX 921 MOVL AX, 4(SP) 922 MOVL $32, 8(SP) // sizeof(tls array) 923 CALL runtime·setldt(SB) 924 RET 925 926 TEXT runtime·emptyfunc(SB),0,$0-0 927 RET 928 929 TEXT runtime·abort(SB),NOSPLIT,$0-0 930 INT $0x3 931 932 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 933 // redirects to memhash(p, h, size) using the size 934 // stored in the closure. 935 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 936 GO_ARGS 937 NO_LOCAL_POINTERS 938 MOVL p+0(FP), AX 939 MOVL h+4(FP), BX 940 MOVL 4(DX), CX 941 MOVL AX, 0(SP) 942 MOVL BX, 4(SP) 943 MOVL CX, 8(SP) 944 CALL runtime·memhash(SB) 945 MOVL 12(SP), AX 946 MOVL AX, ret+8(FP) 947 RET 948 949 // hash function using AES hardware instructions 950 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 951 MOVL p+0(FP), AX // ptr to data 952 MOVL s+8(FP), CX // size 953 LEAL ret+12(FP), DX 954 JMP runtime·aeshashbody(SB) 955 956 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 957 MOVL p+0(FP), AX // ptr to string object 958 MOVL 4(AX), CX // length of string 959 MOVL (AX), AX // string data 960 LEAL ret+8(FP), DX 961 JMP runtime·aeshashbody(SB) 962 963 // AX: data 964 // CX: length 965 // DX: address to put return value 966 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 967 MOVL h+4(FP), X6 // seed to low 64 bits of xmm6 968 PINSRD $2, CX, X6 // size to high 64 bits of xmm6 969 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 970 MOVO runtime·aeskeysched(SB), X7 971 CMPL CX, $16 972 JB aes0to15 973 JE aes16 974 CMPL CX, $32 975 JBE aes17to32 976 CMPL CX, $64 977 JBE aes33to64 978 JMP aes65plus 979 980 aes0to15: 981 TESTL CX, CX 982 JE aes0 983 984 ADDL $16, AX 985 TESTW $0xff0, AX 986 JE endofpage 987 988 // 16 bytes loaded at this address won't cross 989 // a page boundary, so we can load it directly. 990 MOVOU -16(AX), X0 991 ADDL CX, CX 992 PAND masks<>(SB)(CX*8), X0 993 994 // scramble 3 times 995 AESENC X6, X0 996 AESENC X7, X0 997 AESENC X7, X0 998 MOVL X0, (DX) 999 RET 1000 1001 endofpage: 1002 // address ends in 1111xxxx. Might be up against 1003 // a page boundary, so load ending at last byte. 1004 // Then shift bytes down using pshufb. 1005 MOVOU -32(AX)(CX*1), X0 1006 ADDL CX, CX 1007 PSHUFB shifts<>(SB)(CX*8), X0 1008 AESENC X6, X0 1009 AESENC X7, X0 1010 AESENC X7, X0 1011 MOVL X0, (DX) 1012 RET 1013 1014 aes0: 1015 // return input seed 1016 MOVL h+4(FP), AX 1017 MOVL AX, (DX) 1018 RET 1019 1020 aes16: 1021 MOVOU (AX), X0 1022 AESENC X6, X0 1023 AESENC X7, X0 1024 AESENC X7, X0 1025 MOVL X0, (DX) 1026 RET 1027 1028 1029 aes17to32: 1030 // load data to be hashed 1031 MOVOU (AX), X0 1032 MOVOU -16(AX)(CX*1), X1 1033 1034 // scramble 3 times 1035 AESENC X6, X0 1036 AESENC runtime·aeskeysched+16(SB), X1 1037 AESENC X7, X0 1038 AESENC X7, X1 1039 AESENC X7, X0 1040 AESENC X7, X1 1041 1042 // combine results 1043 PXOR X1, X0 1044 MOVL X0, (DX) 1045 RET 1046 1047 aes33to64: 1048 MOVOU (AX), X0 1049 MOVOU 16(AX), X1 1050 MOVOU -32(AX)(CX*1), X2 1051 MOVOU -16(AX)(CX*1), X3 1052 1053 AESENC X6, X0 1054 AESENC runtime·aeskeysched+16(SB), X1 1055 AESENC runtime·aeskeysched+32(SB), X2 1056 AESENC runtime·aeskeysched+48(SB), X3 1057 AESENC X7, X0 1058 AESENC X7, X1 1059 AESENC X7, X2 1060 AESENC X7, X3 1061 AESENC X7, X0 1062 AESENC X7, X1 1063 AESENC X7, X2 1064 AESENC X7, X3 1065 1066 PXOR X2, X0 1067 PXOR X3, X1 1068 PXOR X1, X0 1069 MOVL X0, (DX) 1070 RET 1071 1072 aes65plus: 1073 // start with last (possibly overlapping) block 1074 MOVOU -64(AX)(CX*1), X0 1075 MOVOU -48(AX)(CX*1), X1 1076 MOVOU -32(AX)(CX*1), X2 1077 MOVOU -16(AX)(CX*1), X3 1078 1079 // scramble state once 1080 AESENC X6, X0 1081 AESENC runtime·aeskeysched+16(SB), X1 1082 AESENC runtime·aeskeysched+32(SB), X2 1083 AESENC runtime·aeskeysched+48(SB), X3 1084 1085 // compute number of remaining 64-byte blocks 1086 DECL CX 1087 SHRL $6, CX 1088 1089 aesloop: 1090 // scramble state, xor in a block 1091 MOVOU (AX), X4 1092 MOVOU 16(AX), X5 1093 AESENC X4, X0 1094 AESENC X5, X1 1095 MOVOU 32(AX), X4 1096 MOVOU 48(AX), X5 1097 AESENC X4, X2 1098 AESENC X5, X3 1099 1100 // scramble state 1101 AESENC X7, X0 1102 AESENC X7, X1 1103 AESENC X7, X2 1104 AESENC X7, X3 1105 1106 ADDL $64, AX 1107 DECL CX 1108 JNE aesloop 1109 1110 // 2 more scrambles to finish 1111 AESENC X7, X0 1112 AESENC X7, X1 1113 AESENC X7, X2 1114 AESENC X7, X3 1115 AESENC X7, X0 1116 AESENC X7, X1 1117 AESENC X7, X2 1118 AESENC X7, X3 1119 1120 PXOR X2, X0 1121 PXOR X3, X1 1122 PXOR X1, X0 1123 MOVL X0, (DX) 1124 RET 1125 1126 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1127 MOVL p+0(FP), AX // ptr to data 1128 MOVL h+4(FP), X0 // seed 1129 PINSRD $1, (AX), X0 // data 1130 AESENC runtime·aeskeysched+0(SB), X0 1131 AESENC runtime·aeskeysched+16(SB), X0 1132 AESENC runtime·aeskeysched+32(SB), X0 1133 MOVL X0, ret+8(FP) 1134 RET 1135 1136 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1137 MOVL p+0(FP), AX // ptr to data 1138 MOVQ (AX), X0 // data 1139 PINSRD $2, h+4(FP), X0 // seed 1140 AESENC runtime·aeskeysched+0(SB), X0 1141 AESENC runtime·aeskeysched+16(SB), X0 1142 AESENC runtime·aeskeysched+32(SB), X0 1143 MOVL X0, ret+8(FP) 1144 RET 1145 1146 // simple mask to get rid of data in the high part of the register. 1147 DATA masks<>+0x00(SB)/4, $0x00000000 1148 DATA masks<>+0x04(SB)/4, $0x00000000 1149 DATA masks<>+0x08(SB)/4, $0x00000000 1150 DATA masks<>+0x0c(SB)/4, $0x00000000 1151 1152 DATA masks<>+0x10(SB)/4, $0x000000ff 1153 DATA masks<>+0x14(SB)/4, $0x00000000 1154 DATA masks<>+0x18(SB)/4, $0x00000000 1155 DATA masks<>+0x1c(SB)/4, $0x00000000 1156 1157 DATA masks<>+0x20(SB)/4, $0x0000ffff 1158 DATA masks<>+0x24(SB)/4, $0x00000000 1159 DATA masks<>+0x28(SB)/4, $0x00000000 1160 DATA masks<>+0x2c(SB)/4, $0x00000000 1161 1162 DATA masks<>+0x30(SB)/4, $0x00ffffff 1163 DATA masks<>+0x34(SB)/4, $0x00000000 1164 DATA masks<>+0x38(SB)/4, $0x00000000 1165 DATA masks<>+0x3c(SB)/4, $0x00000000 1166 1167 DATA masks<>+0x40(SB)/4, $0xffffffff 1168 DATA masks<>+0x44(SB)/4, $0x00000000 1169 DATA masks<>+0x48(SB)/4, $0x00000000 1170 DATA masks<>+0x4c(SB)/4, $0x00000000 1171 1172 DATA masks<>+0x50(SB)/4, $0xffffffff 1173 DATA masks<>+0x54(SB)/4, $0x000000ff 1174 DATA masks<>+0x58(SB)/4, $0x00000000 1175 DATA masks<>+0x5c(SB)/4, $0x00000000 1176 1177 DATA masks<>+0x60(SB)/4, $0xffffffff 1178 DATA masks<>+0x64(SB)/4, $0x0000ffff 1179 DATA masks<>+0x68(SB)/4, $0x00000000 1180 DATA masks<>+0x6c(SB)/4, $0x00000000 1181 1182 DATA masks<>+0x70(SB)/4, $0xffffffff 1183 DATA masks<>+0x74(SB)/4, $0x00ffffff 1184 DATA masks<>+0x78(SB)/4, $0x00000000 1185 DATA masks<>+0x7c(SB)/4, $0x00000000 1186 1187 DATA masks<>+0x80(SB)/4, $0xffffffff 1188 DATA masks<>+0x84(SB)/4, $0xffffffff 1189 DATA masks<>+0x88(SB)/4, $0x00000000 1190 DATA masks<>+0x8c(SB)/4, $0x00000000 1191 1192 DATA masks<>+0x90(SB)/4, $0xffffffff 1193 DATA masks<>+0x94(SB)/4, $0xffffffff 1194 DATA masks<>+0x98(SB)/4, $0x000000ff 1195 DATA masks<>+0x9c(SB)/4, $0x00000000 1196 1197 DATA masks<>+0xa0(SB)/4, $0xffffffff 1198 DATA masks<>+0xa4(SB)/4, $0xffffffff 1199 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1200 DATA masks<>+0xac(SB)/4, $0x00000000 1201 1202 DATA masks<>+0xb0(SB)/4, $0xffffffff 1203 DATA masks<>+0xb4(SB)/4, $0xffffffff 1204 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1205 DATA masks<>+0xbc(SB)/4, $0x00000000 1206 1207 DATA masks<>+0xc0(SB)/4, $0xffffffff 1208 DATA masks<>+0xc4(SB)/4, $0xffffffff 1209 DATA masks<>+0xc8(SB)/4, $0xffffffff 1210 DATA masks<>+0xcc(SB)/4, $0x00000000 1211 1212 DATA masks<>+0xd0(SB)/4, $0xffffffff 1213 DATA masks<>+0xd4(SB)/4, $0xffffffff 1214 DATA masks<>+0xd8(SB)/4, $0xffffffff 1215 DATA masks<>+0xdc(SB)/4, $0x000000ff 1216 1217 DATA masks<>+0xe0(SB)/4, $0xffffffff 1218 DATA masks<>+0xe4(SB)/4, $0xffffffff 1219 DATA masks<>+0xe8(SB)/4, $0xffffffff 1220 DATA masks<>+0xec(SB)/4, $0x0000ffff 1221 1222 DATA masks<>+0xf0(SB)/4, $0xffffffff 1223 DATA masks<>+0xf4(SB)/4, $0xffffffff 1224 DATA masks<>+0xf8(SB)/4, $0xffffffff 1225 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1226 1227 GLOBL masks<>(SB),RODATA,$256 1228 1229 // these are arguments to pshufb. They move data down from 1230 // the high bytes of the register to the low bytes of the register. 1231 // index is how many bytes to move. 1232 DATA shifts<>+0x00(SB)/4, $0x00000000 1233 DATA shifts<>+0x04(SB)/4, $0x00000000 1234 DATA shifts<>+0x08(SB)/4, $0x00000000 1235 DATA shifts<>+0x0c(SB)/4, $0x00000000 1236 1237 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1238 DATA shifts<>+0x14(SB)/4, $0xffffffff 1239 DATA shifts<>+0x18(SB)/4, $0xffffffff 1240 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1241 1242 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1243 DATA shifts<>+0x24(SB)/4, $0xffffffff 1244 DATA shifts<>+0x28(SB)/4, $0xffffffff 1245 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1246 1247 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1248 DATA shifts<>+0x34(SB)/4, $0xffffffff 1249 DATA shifts<>+0x38(SB)/4, $0xffffffff 1250 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1251 1252 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1253 DATA shifts<>+0x44(SB)/4, $0xffffffff 1254 DATA shifts<>+0x48(SB)/4, $0xffffffff 1255 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1256 1257 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1258 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1259 DATA shifts<>+0x58(SB)/4, $0xffffffff 1260 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1261 1262 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1263 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1264 DATA shifts<>+0x68(SB)/4, $0xffffffff 1265 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1266 1267 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1268 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1269 DATA shifts<>+0x78(SB)/4, $0xffffffff 1270 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1271 1272 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1273 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1274 DATA shifts<>+0x88(SB)/4, $0xffffffff 1275 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1276 1277 DATA shifts<>+0x90(SB)/4, $0x0a090807 1278 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1279 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1280 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1281 1282 DATA shifts<>+0xa0(SB)/4, $0x09080706 1283 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1284 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1285 DATA shifts<>+0xac(SB)/4, $0xffffffff 1286 1287 DATA shifts<>+0xb0(SB)/4, $0x08070605 1288 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1289 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1290 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1291 1292 DATA shifts<>+0xc0(SB)/4, $0x07060504 1293 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1294 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1295 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1296 1297 DATA shifts<>+0xd0(SB)/4, $0x06050403 1298 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1299 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1300 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1301 1302 DATA shifts<>+0xe0(SB)/4, $0x05040302 1303 DATA shifts<>+0xe4(SB)/4, $0x09080706 1304 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1305 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1306 1307 DATA shifts<>+0xf0(SB)/4, $0x04030201 1308 DATA shifts<>+0xf4(SB)/4, $0x08070605 1309 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1310 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1311 1312 GLOBL shifts<>(SB),RODATA,$256 1313 1314 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1315 MOVL a+0(FP), SI 1316 MOVL b+4(FP), DI 1317 MOVL size+8(FP), BX 1318 LEAL ret+12(FP), AX 1319 JMP runtime·memeqbody(SB) 1320 1321 // memequal_varlen(a, b unsafe.Pointer) bool 1322 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1323 MOVL a+0(FP), SI 1324 MOVL b+4(FP), DI 1325 CMPL SI, DI 1326 JEQ eq 1327 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1328 LEAL ret+8(FP), AX 1329 JMP runtime·memeqbody(SB) 1330 eq: 1331 MOVB $1, ret+8(FP) 1332 RET 1333 1334 // eqstring tests whether two strings are equal. 1335 // The compiler guarantees that strings passed 1336 // to eqstring have equal length. 1337 // See runtime_test.go:eqstring_generic for 1338 // equivalent Go code. 1339 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1340 MOVL s1str+0(FP), SI 1341 MOVL s2str+8(FP), DI 1342 CMPL SI, DI 1343 JEQ same 1344 MOVL s1len+4(FP), BX 1345 LEAL v+16(FP), AX 1346 JMP runtime·memeqbody(SB) 1347 same: 1348 MOVB $1, v+16(FP) 1349 RET 1350 1351 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1352 MOVL a_len+4(FP), BX 1353 MOVL b_len+16(FP), CX 1354 CMPL BX, CX 1355 JNE eqret 1356 MOVL a+0(FP), SI 1357 MOVL b+12(FP), DI 1358 LEAL ret+24(FP), AX 1359 JMP runtime·memeqbody(SB) 1360 eqret: 1361 MOVB $0, ret+24(FP) 1362 RET 1363 1364 // a in SI 1365 // b in DI 1366 // count in BX 1367 // address of result byte in AX 1368 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1369 CMPL BX, $4 1370 JB small 1371 1372 // 64 bytes at a time using xmm registers 1373 hugeloop: 1374 CMPL BX, $64 1375 JB bigloop 1376 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1377 JE bigloop 1378 MOVOU (SI), X0 1379 MOVOU (DI), X1 1380 MOVOU 16(SI), X2 1381 MOVOU 16(DI), X3 1382 MOVOU 32(SI), X4 1383 MOVOU 32(DI), X5 1384 MOVOU 48(SI), X6 1385 MOVOU 48(DI), X7 1386 PCMPEQB X1, X0 1387 PCMPEQB X3, X2 1388 PCMPEQB X5, X4 1389 PCMPEQB X7, X6 1390 PAND X2, X0 1391 PAND X6, X4 1392 PAND X4, X0 1393 PMOVMSKB X0, DX 1394 ADDL $64, SI 1395 ADDL $64, DI 1396 SUBL $64, BX 1397 CMPL DX, $0xffff 1398 JEQ hugeloop 1399 MOVB $0, (AX) 1400 RET 1401 1402 // 4 bytes at a time using 32-bit register 1403 bigloop: 1404 CMPL BX, $4 1405 JBE leftover 1406 MOVL (SI), CX 1407 MOVL (DI), DX 1408 ADDL $4, SI 1409 ADDL $4, DI 1410 SUBL $4, BX 1411 CMPL CX, DX 1412 JEQ bigloop 1413 MOVB $0, (AX) 1414 RET 1415 1416 // remaining 0-4 bytes 1417 leftover: 1418 MOVL -4(SI)(BX*1), CX 1419 MOVL -4(DI)(BX*1), DX 1420 CMPL CX, DX 1421 SETEQ (AX) 1422 RET 1423 1424 small: 1425 CMPL BX, $0 1426 JEQ equal 1427 1428 LEAL 0(BX*8), CX 1429 NEGL CX 1430 1431 MOVL SI, DX 1432 CMPB DX, $0xfc 1433 JA si_high 1434 1435 // load at SI won't cross a page boundary. 1436 MOVL (SI), SI 1437 JMP si_finish 1438 si_high: 1439 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1440 MOVL -4(SI)(BX*1), SI 1441 SHRL CX, SI 1442 si_finish: 1443 1444 // same for DI. 1445 MOVL DI, DX 1446 CMPB DX, $0xfc 1447 JA di_high 1448 MOVL (DI), DI 1449 JMP di_finish 1450 di_high: 1451 MOVL -4(DI)(BX*1), DI 1452 SHRL CX, DI 1453 di_finish: 1454 1455 SUBL SI, DI 1456 SHLL CX, DI 1457 equal: 1458 SETEQ (AX) 1459 RET 1460 1461 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1462 MOVL s1_base+0(FP), SI 1463 MOVL s1_len+4(FP), BX 1464 MOVL s2_base+8(FP), DI 1465 MOVL s2_len+12(FP), DX 1466 LEAL ret+16(FP), AX 1467 JMP runtime·cmpbody(SB) 1468 1469 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1470 MOVL s1+0(FP), SI 1471 MOVL s1+4(FP), BX 1472 MOVL s2+12(FP), DI 1473 MOVL s2+16(FP), DX 1474 LEAL ret+24(FP), AX 1475 JMP runtime·cmpbody(SB) 1476 1477 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1478 MOVL s+0(FP), SI 1479 MOVL s_len+4(FP), CX 1480 MOVB c+12(FP), AL 1481 MOVL SI, DI 1482 CLD; REPN; SCASB 1483 JZ 3(PC) 1484 MOVL $-1, ret+16(FP) 1485 RET 1486 SUBL SI, DI 1487 SUBL $1, DI 1488 MOVL DI, ret+16(FP) 1489 RET 1490 1491 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1492 MOVL s+0(FP), SI 1493 MOVL s_len+4(FP), CX 1494 MOVB c+8(FP), AL 1495 MOVL SI, DI 1496 CLD; REPN; SCASB 1497 JZ 3(PC) 1498 MOVL $-1, ret+12(FP) 1499 RET 1500 SUBL SI, DI 1501 SUBL $1, DI 1502 MOVL DI, ret+12(FP) 1503 RET 1504 1505 // input: 1506 // SI = a 1507 // DI = b 1508 // BX = alen 1509 // DX = blen 1510 // AX = address of return word (set to 1/0/-1) 1511 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1512 MOVL DX, BP 1513 SUBL BX, DX // DX = blen-alen 1514 CMOVLGT BX, BP // BP = min(alen, blen) 1515 CMPL SI, DI 1516 JEQ allsame 1517 CMPL BP, $4 1518 JB small 1519 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1520 JE mediumloop 1521 largeloop: 1522 CMPL BP, $16 1523 JB mediumloop 1524 MOVOU (SI), X0 1525 MOVOU (DI), X1 1526 PCMPEQB X0, X1 1527 PMOVMSKB X1, BX 1528 XORL $0xffff, BX // convert EQ to NE 1529 JNE diff16 // branch if at least one byte is not equal 1530 ADDL $16, SI 1531 ADDL $16, DI 1532 SUBL $16, BP 1533 JMP largeloop 1534 1535 diff16: 1536 BSFL BX, BX // index of first byte that differs 1537 XORL DX, DX 1538 MOVB (SI)(BX*1), CX 1539 CMPB CX, (DI)(BX*1) 1540 SETHI DX 1541 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1542 MOVL DX, (AX) 1543 RET 1544 1545 mediumloop: 1546 CMPL BP, $4 1547 JBE _0through4 1548 MOVL (SI), BX 1549 MOVL (DI), CX 1550 CMPL BX, CX 1551 JNE diff4 1552 ADDL $4, SI 1553 ADDL $4, DI 1554 SUBL $4, BP 1555 JMP mediumloop 1556 1557 _0through4: 1558 MOVL -4(SI)(BP*1), BX 1559 MOVL -4(DI)(BP*1), CX 1560 CMPL BX, CX 1561 JEQ allsame 1562 1563 diff4: 1564 BSWAPL BX // reverse order of bytes 1565 BSWAPL CX 1566 XORL BX, CX // find bit differences 1567 BSRL CX, CX // index of highest bit difference 1568 SHRL CX, BX // move a's bit to bottom 1569 ANDL $1, BX // mask bit 1570 LEAL -1(BX*2), BX // 1/0 => +1/-1 1571 MOVL BX, (AX) 1572 RET 1573 1574 // 0-3 bytes in common 1575 small: 1576 LEAL (BP*8), CX 1577 NEGL CX 1578 JEQ allsame 1579 1580 // load si 1581 CMPB SI, $0xfc 1582 JA si_high 1583 MOVL (SI), SI 1584 JMP si_finish 1585 si_high: 1586 MOVL -4(SI)(BP*1), SI 1587 SHRL CX, SI 1588 si_finish: 1589 SHLL CX, SI 1590 1591 // same for di 1592 CMPB DI, $0xfc 1593 JA di_high 1594 MOVL (DI), DI 1595 JMP di_finish 1596 di_high: 1597 MOVL -4(DI)(BP*1), DI 1598 SHRL CX, DI 1599 di_finish: 1600 SHLL CX, DI 1601 1602 BSWAPL SI // reverse order of bytes 1603 BSWAPL DI 1604 XORL SI, DI // find bit differences 1605 JEQ allsame 1606 BSRL DI, CX // index of highest bit difference 1607 SHRL CX, SI // move a's bit to bottom 1608 ANDL $1, SI // mask bit 1609 LEAL -1(SI*2), BX // 1/0 => +1/-1 1610 MOVL BX, (AX) 1611 RET 1612 1613 // all the bytes in common are the same, so we just need 1614 // to compare the lengths. 1615 allsame: 1616 XORL BX, BX 1617 XORL CX, CX 1618 TESTL DX, DX 1619 SETLT BX // 1 if alen > blen 1620 SETEQ CX // 1 if alen == blen 1621 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1622 MOVL BX, (AX) 1623 RET 1624 1625 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1626 get_tls(CX) 1627 MOVL g(CX), AX 1628 MOVL g_m(AX), AX 1629 MOVL m_fastrand(AX), DX 1630 ADDL DX, DX 1631 MOVL DX, BX 1632 XORL $0x88888eef, DX 1633 CMOVLMI BX, DX 1634 MOVL DX, m_fastrand(AX) 1635 MOVL DX, ret+0(FP) 1636 RET 1637 1638 TEXT runtime·return0(SB), NOSPLIT, $0 1639 MOVL $0, AX 1640 RET 1641 1642 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1643 // Must obey the gcc calling convention. 1644 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1645 get_tls(CX) 1646 MOVL g(CX), AX 1647 MOVL g_m(AX), AX 1648 MOVL m_curg(AX), AX 1649 MOVL (g_stack+stack_hi)(AX), AX 1650 RET 1651 1652 // The top-most function running on a goroutine 1653 // returns to goexit+PCQuantum. 1654 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1655 BYTE $0x90 // NOP 1656 CALL runtime·goexit1(SB) // does not return 1657 // traceback from goexit1 must hit code range of goexit 1658 BYTE $0x90 // NOP 1659 1660 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1661 MOVL addr+0(FP), AX 1662 PREFETCHT0 (AX) 1663 RET 1664 1665 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1666 MOVL addr+0(FP), AX 1667 PREFETCHT1 (AX) 1668 RET 1669 1670 1671 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1672 MOVL addr+0(FP), AX 1673 PREFETCHT2 (AX) 1674 RET 1675 1676 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1677 MOVL addr+0(FP), AX 1678 PREFETCHNTA (AX) 1679 RET