github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it to let it 53 // initialize and to set up GS. if not, 54 // we set up GS ourselves. 55 MOVL _cgo_init(SB), AX 56 TESTL AX, AX 57 JZ needtls 58 MOVL $setg_gcc<>(SB), BX 59 MOVL BX, 4(SP) 60 MOVL BP, 0(SP) 61 CALL AX 62 63 // update stackguard after _cgo_init 64 MOVL $runtime·g0(SB), CX 65 MOVL (g_stack+stack_lo)(CX), AX 66 ADDL $const__StackGuard, AX 67 MOVL AX, g_stackguard0(CX) 68 MOVL AX, g_stackguard1(CX) 69 70 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 71 CMPL runtime·iswindows(SB), $0 72 JEQ ok 73 needtls: 74 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 75 CMPL runtime·isplan9(SB), $1 76 JEQ ok 77 78 // set up %gs 79 CALL runtime·ldt0setup(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVL $0x123, g(BX) 84 MOVL runtime·tls0(SB), AX 85 CMPL AX, $0x123 86 JEQ ok 87 MOVL AX, 0 // abort 88 ok: 89 // set up m and g "registers" 90 get_tls(BX) 91 LEAL runtime·g0(SB), CX 92 MOVL CX, g(BX) 93 LEAL runtime·m0(SB), AX 94 95 // save m->g0 = g0 96 MOVL CX, m_g0(AX) 97 // save g0->m = m0 98 MOVL AX, g_m(CX) 99 100 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 101 102 // convention is D is always cleared 103 CLD 104 105 CALL runtime·check(SB) 106 107 // saved argc, argv 108 MOVL 120(SP), AX 109 MOVL AX, 0(SP) 110 MOVL 124(SP), AX 111 MOVL AX, 4(SP) 112 CALL runtime·args(SB) 113 CALL runtime·osinit(SB) 114 CALL runtime·schedinit(SB) 115 116 // create a new goroutine to start program 117 PUSHL $runtime·mainPC(SB) // entry 118 PUSHL $0 // arg size 119 CALL runtime·newproc(SB) 120 POPL AX 121 POPL AX 122 123 // start this M 124 CALL runtime·mstart(SB) 125 126 INT $3 127 RET 128 129 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 130 GLOBL runtime·mainPC(SB),RODATA,$4 131 132 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 133 INT $3 134 RET 135 136 TEXT runtime·asminit(SB),NOSPLIT,$0-0 137 // Linux and MinGW start the FPU in extended double precision. 138 // Other operating systems use double precision. 139 // Change to double precision to match them, 140 // and to match other hardware that only has double. 141 PUSHL $0x27F 142 FLDCW 0(SP) 143 POPL AX 144 RET 145 146 /* 147 * go-routine 148 */ 149 150 // void gosave(Gobuf*) 151 // save state in Gobuf; setjmp 152 TEXT runtime·gosave(SB), NOSPLIT, $0-4 153 MOVL buf+0(FP), AX // gobuf 154 LEAL buf+0(FP), BX // caller's SP 155 MOVL BX, gobuf_sp(AX) 156 MOVL 0(SP), BX // caller's PC 157 MOVL BX, gobuf_pc(AX) 158 MOVL $0, gobuf_ret(AX) 159 MOVL $0, gobuf_ctxt(AX) 160 get_tls(CX) 161 MOVL g(CX), BX 162 MOVL BX, gobuf_g(AX) 163 RET 164 165 // void gogo(Gobuf*) 166 // restore state from Gobuf; longjmp 167 TEXT runtime·gogo(SB), NOSPLIT, $0-4 168 MOVL buf+0(FP), BX // gobuf 169 MOVL gobuf_g(BX), DX 170 MOVL 0(DX), CX // make sure g != nil 171 get_tls(CX) 172 MOVL DX, g(CX) 173 MOVL gobuf_sp(BX), SP // restore SP 174 MOVL gobuf_ret(BX), AX 175 MOVL gobuf_ctxt(BX), DX 176 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 177 MOVL $0, gobuf_ret(BX) 178 MOVL $0, gobuf_ctxt(BX) 179 MOVL gobuf_pc(BX), BX 180 JMP BX 181 182 // func mcall(fn func(*g)) 183 // Switch to m->g0's stack, call fn(g). 184 // Fn must never return. It should gogo(&g->sched) 185 // to keep running g. 186 TEXT runtime·mcall(SB), NOSPLIT, $0-4 187 MOVL fn+0(FP), DI 188 189 get_tls(CX) 190 MOVL g(CX), AX // save state in g->sched 191 MOVL 0(SP), BX // caller's PC 192 MOVL BX, (g_sched+gobuf_pc)(AX) 193 LEAL fn+0(FP), BX // caller's SP 194 MOVL BX, (g_sched+gobuf_sp)(AX) 195 MOVL AX, (g_sched+gobuf_g)(AX) 196 197 // switch to m->g0 & its stack, call fn 198 MOVL g(CX), BX 199 MOVL g_m(BX), BX 200 MOVL m_g0(BX), SI 201 CMPL SI, AX // if g == m->g0 call badmcall 202 JNE 3(PC) 203 MOVL $runtime·badmcall(SB), AX 204 JMP AX 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 207 PUSHL AX 208 MOVL DI, DX 209 MOVL 0(DI), DI 210 CALL DI 211 POPL AX 212 MOVL $runtime·badmcall2(SB), AX 213 JMP AX 214 RET 215 216 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 217 // of the G stack. We need to distinguish the routine that 218 // lives at the bottom of the G stack from the one that lives 219 // at the top of the system stack because the one at the top of 220 // the system stack terminates the stack walk (see topofstack()). 221 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 222 RET 223 224 // func systemstack(fn func()) 225 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 226 MOVL fn+0(FP), DI // DI = fn 227 get_tls(CX) 228 MOVL g(CX), AX // AX = g 229 MOVL g_m(AX), BX // BX = m 230 231 MOVL m_gsignal(BX), DX // DX = gsignal 232 CMPL AX, DX 233 JEQ noswitch 234 235 MOVL m_g0(BX), DX // DX = g0 236 CMPL AX, DX 237 JEQ noswitch 238 239 MOVL m_curg(BX), BP 240 CMPL AX, BP 241 JEQ switch 242 243 // Bad: g is not gsignal, not g0, not curg. What is it? 244 // Hide call from linker nosplit analysis. 245 MOVL $runtime·badsystemstack(SB), AX 246 CALL AX 247 248 switch: 249 // save our state in g->sched. Pretend to 250 // be systemstack_switch if the G stack is scanned. 251 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 252 MOVL SP, (g_sched+gobuf_sp)(AX) 253 MOVL AX, (g_sched+gobuf_g)(AX) 254 255 // switch to g0 256 MOVL DX, g(CX) 257 MOVL (g_sched+gobuf_sp)(DX), BX 258 // make it look like mstart called systemstack on g0, to stop traceback 259 SUBL $4, BX 260 MOVL $runtime·mstart(SB), DX 261 MOVL DX, 0(BX) 262 MOVL BX, SP 263 264 // call target function 265 MOVL DI, DX 266 MOVL 0(DI), DI 267 CALL DI 268 269 // switch back to g 270 get_tls(CX) 271 MOVL g(CX), AX 272 MOVL g_m(AX), BX 273 MOVL m_curg(BX), AX 274 MOVL AX, g(CX) 275 MOVL (g_sched+gobuf_sp)(AX), SP 276 MOVL $0, (g_sched+gobuf_sp)(AX) 277 RET 278 279 noswitch: 280 // already on system stack, just call directly 281 MOVL DI, DX 282 MOVL 0(DI), DI 283 CALL DI 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // 292 // The traceback routines see morestack on a g0 as being 293 // the top of a stack (for example, morestack calling newstack 294 // calling the scheduler calling newm calling gc), so we must 295 // record an argument size. For that purpose, it has no arguments. 296 TEXT runtime·morestack(SB),NOSPLIT,$0-0 297 // Cannot grow scheduler stack (m->g0). 298 get_tls(CX) 299 MOVL g(CX), BX 300 MOVL g_m(BX), BX 301 MOVL m_g0(BX), SI 302 CMPL g(CX), SI 303 JNE 2(PC) 304 INT $3 305 306 // Cannot grow signal stack. 307 MOVL m_gsignal(BX), SI 308 CMPL g(CX), SI 309 JNE 2(PC) 310 INT $3 311 312 // Called from f. 313 // Set m->morebuf to f's caller. 314 MOVL 4(SP), DI // f's caller's PC 315 MOVL DI, (m_morebuf+gobuf_pc)(BX) 316 LEAL 8(SP), CX // f's caller's SP 317 MOVL CX, (m_morebuf+gobuf_sp)(BX) 318 get_tls(CX) 319 MOVL g(CX), SI 320 MOVL SI, (m_morebuf+gobuf_g)(BX) 321 322 // Set g->sched to context in f. 323 MOVL 0(SP), AX // f's PC 324 MOVL AX, (g_sched+gobuf_pc)(SI) 325 MOVL SI, (g_sched+gobuf_g)(SI) 326 LEAL 4(SP), AX // f's SP 327 MOVL AX, (g_sched+gobuf_sp)(SI) 328 MOVL DX, (g_sched+gobuf_ctxt)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVL m_g0(BX), BP 332 MOVL BP, g(CX) 333 MOVL (g_sched+gobuf_sp)(BP), AX 334 MOVL -4(AX), BX // fault if CALL would, before smashing SP 335 MOVL AX, SP 336 CALL runtime·newstack(SB) 337 MOVL $0, 0x1003 // crash if newstack returns 338 RET 339 340 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 341 MOVL $0, DX 342 JMP runtime·morestack(SB) 343 344 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 345 // We came here via a RET to an overwritten return PC. 346 // AX may be live. Other registers are available. 347 348 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 349 get_tls(CX) 350 MOVL g(CX), CX 351 MOVL (g_stkbar+slice_array)(CX), DX 352 MOVL g_stkbarPos(CX), BX 353 IMULL $stkbar__size, BX // Too big for SIB. 354 MOVL stkbar_savedLRVal(DX)(BX*1), BX 355 // Record that this stack barrier was hit. 356 ADDL $1, g_stkbarPos(CX) 357 // Jump to the original return PC. 358 JMP BX 359 360 // reflectcall: call a function with the given argument list 361 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 362 // we don't have variable-sized frames, so we use a small number 363 // of constant-sized-frame functions to encode a few bits of size in the pc. 364 // Caution: ugly multiline assembly macros in your future! 365 366 #define DISPATCH(NAME,MAXSIZE) \ 367 CMPL CX, $MAXSIZE; \ 368 JA 3(PC); \ 369 MOVL $NAME(SB), AX; \ 370 JMP AX 371 // Note: can't just "JMP NAME(SB)" - bad inlining results. 372 373 TEXT reflect·call(SB), NOSPLIT, $0-0 374 JMP ·reflectcall(SB) 375 376 TEXT ·reflectcall(SB), NOSPLIT, $0-20 377 MOVL argsize+12(FP), CX 378 DISPATCH(runtime·call16, 16) 379 DISPATCH(runtime·call32, 32) 380 DISPATCH(runtime·call64, 64) 381 DISPATCH(runtime·call128, 128) 382 DISPATCH(runtime·call256, 256) 383 DISPATCH(runtime·call512, 512) 384 DISPATCH(runtime·call1024, 1024) 385 DISPATCH(runtime·call2048, 2048) 386 DISPATCH(runtime·call4096, 4096) 387 DISPATCH(runtime·call8192, 8192) 388 DISPATCH(runtime·call16384, 16384) 389 DISPATCH(runtime·call32768, 32768) 390 DISPATCH(runtime·call65536, 65536) 391 DISPATCH(runtime·call131072, 131072) 392 DISPATCH(runtime·call262144, 262144) 393 DISPATCH(runtime·call524288, 524288) 394 DISPATCH(runtime·call1048576, 1048576) 395 DISPATCH(runtime·call2097152, 2097152) 396 DISPATCH(runtime·call4194304, 4194304) 397 DISPATCH(runtime·call8388608, 8388608) 398 DISPATCH(runtime·call16777216, 16777216) 399 DISPATCH(runtime·call33554432, 33554432) 400 DISPATCH(runtime·call67108864, 67108864) 401 DISPATCH(runtime·call134217728, 134217728) 402 DISPATCH(runtime·call268435456, 268435456) 403 DISPATCH(runtime·call536870912, 536870912) 404 DISPATCH(runtime·call1073741824, 1073741824) 405 MOVL $runtime·badreflectcall(SB), AX 406 JMP AX 407 408 #define CALLFN(NAME,MAXSIZE) \ 409 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 410 NO_LOCAL_POINTERS; \ 411 /* copy arguments to stack */ \ 412 MOVL argptr+8(FP), SI; \ 413 MOVL argsize+12(FP), CX; \ 414 MOVL SP, DI; \ 415 REP;MOVSB; \ 416 /* call function */ \ 417 MOVL f+4(FP), DX; \ 418 MOVL (DX), AX; \ 419 PCDATA $PCDATA_StackMapIndex, $0; \ 420 CALL AX; \ 421 /* copy return values back */ \ 422 MOVL argptr+8(FP), DI; \ 423 MOVL argsize+12(FP), CX; \ 424 MOVL retoffset+16(FP), BX; \ 425 MOVL SP, SI; \ 426 ADDL BX, DI; \ 427 ADDL BX, SI; \ 428 SUBL BX, CX; \ 429 REP;MOVSB; \ 430 /* execute write barrier updates */ \ 431 MOVL argtype+0(FP), DX; \ 432 MOVL argptr+8(FP), DI; \ 433 MOVL argsize+12(FP), CX; \ 434 MOVL retoffset+16(FP), BX; \ 435 MOVL DX, 0(SP); \ 436 MOVL DI, 4(SP); \ 437 MOVL CX, 8(SP); \ 438 MOVL BX, 12(SP); \ 439 CALL runtime·callwritebarrier(SB); \ 440 RET 441 442 CALLFN(·call16, 16) 443 CALLFN(·call32, 32) 444 CALLFN(·call64, 64) 445 CALLFN(·call128, 128) 446 CALLFN(·call256, 256) 447 CALLFN(·call512, 512) 448 CALLFN(·call1024, 1024) 449 CALLFN(·call2048, 2048) 450 CALLFN(·call4096, 4096) 451 CALLFN(·call8192, 8192) 452 CALLFN(·call16384, 16384) 453 CALLFN(·call32768, 32768) 454 CALLFN(·call65536, 65536) 455 CALLFN(·call131072, 131072) 456 CALLFN(·call262144, 262144) 457 CALLFN(·call524288, 524288) 458 CALLFN(·call1048576, 1048576) 459 CALLFN(·call2097152, 2097152) 460 CALLFN(·call4194304, 4194304) 461 CALLFN(·call8388608, 8388608) 462 CALLFN(·call16777216, 16777216) 463 CALLFN(·call33554432, 33554432) 464 CALLFN(·call67108864, 67108864) 465 CALLFN(·call134217728, 134217728) 466 CALLFN(·call268435456, 268435456) 467 CALLFN(·call536870912, 536870912) 468 CALLFN(·call1073741824, 1073741824) 469 470 // bool cas(int32 *val, int32 old, int32 new) 471 // Atomically: 472 // if(*val == old){ 473 // *val = new; 474 // return 1; 475 // }else 476 // return 0; 477 TEXT runtime·cas(SB), NOSPLIT, $0-13 478 MOVL ptr+0(FP), BX 479 MOVL old+4(FP), AX 480 MOVL new+8(FP), CX 481 LOCK 482 CMPXCHGL CX, 0(BX) 483 SETEQ ret+12(FP) 484 RET 485 486 TEXT runtime·casuintptr(SB), NOSPLIT, $0-13 487 JMP runtime·cas(SB) 488 489 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8 490 JMP runtime·atomicload(SB) 491 492 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8 493 JMP runtime·atomicload(SB) 494 495 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8 496 JMP runtime·atomicstore(SB) 497 498 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 499 // Atomically: 500 // if(*val == *old){ 501 // *val = new; 502 // return 1; 503 // } else { 504 // return 0; 505 // } 506 TEXT runtime·cas64(SB), NOSPLIT, $0-21 507 MOVL ptr+0(FP), BP 508 MOVL old_lo+4(FP), AX 509 MOVL old_hi+8(FP), DX 510 MOVL new_lo+12(FP), BX 511 MOVL new_hi+16(FP), CX 512 LOCK 513 CMPXCHG8B 0(BP) 514 SETEQ ret+20(FP) 515 RET 516 517 // bool casp(void **p, void *old, void *new) 518 // Atomically: 519 // if(*p == old){ 520 // *p = new; 521 // return 1; 522 // }else 523 // return 0; 524 TEXT runtime·casp1(SB), NOSPLIT, $0-13 525 MOVL ptr+0(FP), BX 526 MOVL old+4(FP), AX 527 MOVL new+8(FP), CX 528 LOCK 529 CMPXCHGL CX, 0(BX) 530 SETEQ ret+12(FP) 531 RET 532 533 // uint32 xadd(uint32 volatile *val, int32 delta) 534 // Atomically: 535 // *val += delta; 536 // return *val; 537 TEXT runtime·xadd(SB), NOSPLIT, $0-12 538 MOVL ptr+0(FP), BX 539 MOVL delta+4(FP), AX 540 MOVL AX, CX 541 LOCK 542 XADDL AX, 0(BX) 543 ADDL CX, AX 544 MOVL AX, ret+8(FP) 545 RET 546 547 TEXT runtime·xchg(SB), NOSPLIT, $0-12 548 MOVL ptr+0(FP), BX 549 MOVL new+4(FP), AX 550 XCHGL AX, 0(BX) 551 MOVL AX, ret+8(FP) 552 RET 553 554 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 555 JMP runtime·xchg(SB) 556 557 TEXT runtime·procyield(SB),NOSPLIT,$0-0 558 MOVL cycles+0(FP), AX 559 again: 560 PAUSE 561 SUBL $1, AX 562 JNZ again 563 RET 564 565 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 566 MOVL ptr+0(FP), BX 567 MOVL val+4(FP), AX 568 XCHGL AX, 0(BX) 569 RET 570 571 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 572 MOVL ptr+0(FP), BX 573 MOVL val+4(FP), AX 574 XCHGL AX, 0(BX) 575 RET 576 577 // uint64 atomicload64(uint64 volatile* addr); 578 TEXT runtime·atomicload64(SB), NOSPLIT, $0-12 579 MOVL ptr+0(FP), AX 580 TESTL $7, AX 581 JZ 2(PC) 582 MOVL 0, AX // crash with nil ptr deref 583 LEAL ret_lo+4(FP), BX 584 // MOVQ (%EAX), %MM0 585 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 586 // MOVQ %MM0, 0(%EBX) 587 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 588 // EMMS 589 BYTE $0x0F; BYTE $0x77 590 RET 591 592 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); 593 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 594 MOVL ptr+0(FP), AX 595 TESTL $7, AX 596 JZ 2(PC) 597 MOVL 0, AX // crash with nil ptr deref 598 // MOVQ and EMMS were introduced on the Pentium MMX. 599 // MOVQ 0x8(%ESP), %MM0 600 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 601 // MOVQ %MM0, (%EAX) 602 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 603 // EMMS 604 BYTE $0x0F; BYTE $0x77 605 // This is essentially a no-op, but it provides required memory fencing. 606 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 607 MOVL $0, AX 608 LOCK 609 XADDL AX, (SP) 610 RET 611 612 // void runtime·atomicor8(byte volatile*, byte); 613 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 614 MOVL ptr+0(FP), AX 615 MOVB val+4(FP), BX 616 LOCK 617 ORB BX, (AX) 618 RET 619 620 // void runtime·atomicand8(byte volatile*, byte); 621 TEXT runtime·atomicand8(SB), NOSPLIT, $0-5 622 MOVL ptr+0(FP), AX 623 MOVB val+4(FP), BX 624 LOCK 625 ANDB BX, (AX) 626 RET 627 628 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 629 // Stores are already ordered on x86, so this is just a 630 // compile barrier. 631 RET 632 633 // void jmpdefer(fn, sp); 634 // called from deferreturn. 635 // 1. pop the caller 636 // 2. sub 5 bytes from the callers return 637 // 3. jmp to the argument 638 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 639 MOVL fv+0(FP), DX // fn 640 MOVL argp+4(FP), BX // caller sp 641 LEAL -4(BX), SP // caller sp after CALL 642 SUBL $5, (SP) // return to CALL again 643 MOVL 0(DX), BX 644 JMP BX // but first run the deferred function 645 646 // Save state of caller into g->sched. 647 TEXT gosave<>(SB),NOSPLIT,$0 648 PUSHL AX 649 PUSHL BX 650 get_tls(BX) 651 MOVL g(BX), BX 652 LEAL arg+0(FP), AX 653 MOVL AX, (g_sched+gobuf_sp)(BX) 654 MOVL -4(AX), AX 655 MOVL AX, (g_sched+gobuf_pc)(BX) 656 MOVL $0, (g_sched+gobuf_ret)(BX) 657 MOVL $0, (g_sched+gobuf_ctxt)(BX) 658 POPL BX 659 POPL AX 660 RET 661 662 // func asmcgocall(fn, arg unsafe.Pointer) int32 663 // Call fn(arg) on the scheduler stack, 664 // aligned appropriately for the gcc ABI. 665 // See cgocall.go for more details. 666 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 667 MOVL fn+0(FP), AX 668 MOVL arg+4(FP), BX 669 670 MOVL SP, DX 671 672 // Figure out if we need to switch to m->g0 stack. 673 // We get called to create new OS threads too, and those 674 // come in on the m->g0 stack already. 675 get_tls(CX) 676 MOVL g(CX), BP 677 MOVL g_m(BP), BP 678 MOVL m_g0(BP), SI 679 MOVL g(CX), DI 680 CMPL SI, DI 681 JEQ 4(PC) 682 CALL gosave<>(SB) 683 MOVL SI, g(CX) 684 MOVL (g_sched+gobuf_sp)(SI), SP 685 686 // Now on a scheduling stack (a pthread-created stack). 687 SUBL $32, SP 688 ANDL $~15, SP // alignment, perhaps unnecessary 689 MOVL DI, 8(SP) // save g 690 MOVL (g_stack+stack_hi)(DI), DI 691 SUBL DX, DI 692 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 693 MOVL BX, 0(SP) // first argument in x86-32 ABI 694 CALL AX 695 696 // Restore registers, g, stack pointer. 697 get_tls(CX) 698 MOVL 8(SP), DI 699 MOVL (g_stack+stack_hi)(DI), SI 700 SUBL 4(SP), SI 701 MOVL DI, g(CX) 702 MOVL SI, SP 703 704 MOVL AX, ret+8(FP) 705 RET 706 707 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 708 // Turn the fn into a Go func (by taking its address) and call 709 // cgocallback_gofunc. 710 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 711 LEAL fn+0(FP), AX 712 MOVL AX, 0(SP) 713 MOVL frame+4(FP), AX 714 MOVL AX, 4(SP) 715 MOVL framesize+8(FP), AX 716 MOVL AX, 8(SP) 717 MOVL $runtime·cgocallback_gofunc(SB), AX 718 CALL AX 719 RET 720 721 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 722 // See cgocall.go for more details. 723 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 724 NO_LOCAL_POINTERS 725 726 // If g is nil, Go did not create the current thread. 727 // Call needm to obtain one for temporary use. 728 // In this case, we're running on the thread stack, so there's 729 // lots of space, but the linker doesn't know. Hide the call from 730 // the linker analysis by using an indirect call through AX. 731 get_tls(CX) 732 #ifdef GOOS_windows 733 MOVL $0, BP 734 CMPL CX, $0 735 JEQ 2(PC) // TODO 736 #endif 737 MOVL g(CX), BP 738 CMPL BP, $0 739 JEQ needm 740 MOVL g_m(BP), BP 741 MOVL BP, DX // saved copy of oldm 742 JMP havem 743 needm: 744 MOVL $0, 0(SP) 745 MOVL $runtime·needm(SB), AX 746 CALL AX 747 MOVL 0(SP), DX 748 get_tls(CX) 749 MOVL g(CX), BP 750 MOVL g_m(BP), BP 751 752 // Set m->sched.sp = SP, so that if a panic happens 753 // during the function we are about to execute, it will 754 // have a valid SP to run on the g0 stack. 755 // The next few lines (after the havem label) 756 // will save this SP onto the stack and then write 757 // the same SP back to m->sched.sp. That seems redundant, 758 // but if an unrecovered panic happens, unwindm will 759 // restore the g->sched.sp from the stack location 760 // and then systemstack will try to use it. If we don't set it here, 761 // that restored SP will be uninitialized (typically 0) and 762 // will not be usable. 763 MOVL m_g0(BP), SI 764 MOVL SP, (g_sched+gobuf_sp)(SI) 765 766 havem: 767 // Now there's a valid m, and we're running on its m->g0. 768 // Save current m->g0->sched.sp on stack and then set it to SP. 769 // Save current sp in m->g0->sched.sp in preparation for 770 // switch back to m->curg stack. 771 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 772 MOVL m_g0(BP), SI 773 MOVL (g_sched+gobuf_sp)(SI), AX 774 MOVL AX, 0(SP) 775 MOVL SP, (g_sched+gobuf_sp)(SI) 776 777 // Switch to m->curg stack and call runtime.cgocallbackg. 778 // Because we are taking over the execution of m->curg 779 // but *not* resuming what had been running, we need to 780 // save that information (m->curg->sched) so we can restore it. 781 // We can restore m->curg->sched.sp easily, because calling 782 // runtime.cgocallbackg leaves SP unchanged upon return. 783 // To save m->curg->sched.pc, we push it onto the stack. 784 // This has the added benefit that it looks to the traceback 785 // routine like cgocallbackg is going to return to that 786 // PC (because the frame we allocate below has the same 787 // size as cgocallback_gofunc's frame declared above) 788 // so that the traceback will seamlessly trace back into 789 // the earlier calls. 790 // 791 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 792 // 4(SP) and 8(SP) are unused. 793 MOVL m_curg(BP), SI 794 MOVL SI, g(CX) 795 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 796 MOVL (g_sched+gobuf_pc)(SI), BP 797 MOVL BP, -4(DI) 798 LEAL -(4+12)(DI), SP 799 MOVL DX, 0(SP) 800 CALL runtime·cgocallbackg(SB) 801 MOVL 0(SP), DX 802 803 // Restore g->sched (== m->curg->sched) from saved values. 804 get_tls(CX) 805 MOVL g(CX), SI 806 MOVL 12(SP), BP 807 MOVL BP, (g_sched+gobuf_pc)(SI) 808 LEAL (12+4)(SP), DI 809 MOVL DI, (g_sched+gobuf_sp)(SI) 810 811 // Switch back to m->g0's stack and restore m->g0->sched.sp. 812 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 813 // so we do not have to restore it.) 814 MOVL g(CX), BP 815 MOVL g_m(BP), BP 816 MOVL m_g0(BP), SI 817 MOVL SI, g(CX) 818 MOVL (g_sched+gobuf_sp)(SI), SP 819 MOVL 0(SP), AX 820 MOVL AX, (g_sched+gobuf_sp)(SI) 821 822 // If the m on entry was nil, we called needm above to borrow an m 823 // for the duration of the call. Since the call is over, return it with dropm. 824 CMPL DX, $0 825 JNE 3(PC) 826 MOVL $runtime·dropm(SB), AX 827 CALL AX 828 829 // Done! 830 RET 831 832 // void setg(G*); set g. for use by needm. 833 TEXT runtime·setg(SB), NOSPLIT, $0-4 834 MOVL gg+0(FP), BX 835 #ifdef GOOS_windows 836 CMPL BX, $0 837 JNE settls 838 MOVL $0, 0x14(FS) 839 RET 840 settls: 841 MOVL g_m(BX), AX 842 LEAL m_tls(AX), AX 843 MOVL AX, 0x14(FS) 844 #endif 845 get_tls(CX) 846 MOVL BX, g(CX) 847 RET 848 849 // void setg_gcc(G*); set g. for use by gcc 850 TEXT setg_gcc<>(SB), NOSPLIT, $0 851 get_tls(AX) 852 MOVL gg+0(FP), DX 853 MOVL DX, g(AX) 854 RET 855 856 // check that SP is in range [g->stack.lo, g->stack.hi) 857 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 858 get_tls(CX) 859 MOVL g(CX), AX 860 CMPL (g_stack+stack_hi)(AX), SP 861 JHI 2(PC) 862 INT $3 863 CMPL SP, (g_stack+stack_lo)(AX) 864 JHI 2(PC) 865 INT $3 866 RET 867 868 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 869 MOVL argp+0(FP),AX // addr of first arg 870 MOVL -4(AX),AX // get calling pc 871 CMPL AX, runtime·stackBarrierPC(SB) 872 JNE nobar 873 // Get original return PC. 874 CALL runtime·nextBarrierPC(SB) 875 MOVL 0(SP), AX 876 nobar: 877 MOVL AX, ret+4(FP) 878 RET 879 880 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 881 MOVL argp+0(FP),AX // addr of first arg 882 MOVL pc+4(FP), BX 883 MOVL -4(AX), CX 884 CMPL CX, runtime·stackBarrierPC(SB) 885 JEQ setbar 886 MOVL BX, -4(AX) // set calling pc 887 RET 888 setbar: 889 // Set the stack barrier return PC. 890 MOVL BX, 0(SP) 891 CALL runtime·setNextBarrierPC(SB) 892 RET 893 894 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 895 MOVL argp+0(FP), AX 896 MOVL AX, ret+4(FP) 897 RET 898 899 // func cputicks() int64 900 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 901 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 902 JEQ done 903 CMPB runtime·lfenceBeforeRdtsc(SB), $1 904 JNE mfence 905 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 906 JMP done 907 mfence: 908 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 909 done: 910 RDTSC 911 MOVL AX, ret_lo+0(FP) 912 MOVL DX, ret_hi+4(FP) 913 RET 914 915 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 916 // set up ldt 7 to point at tls0 917 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 918 // the entry number is just a hint. setldt will set up GS with what it used. 919 MOVL $7, 0(SP) 920 LEAL runtime·tls0(SB), AX 921 MOVL AX, 4(SP) 922 MOVL $32, 8(SP) // sizeof(tls array) 923 CALL runtime·setldt(SB) 924 RET 925 926 TEXT runtime·emptyfunc(SB),0,$0-0 927 RET 928 929 TEXT runtime·abort(SB),NOSPLIT,$0-0 930 INT $0x3 931 932 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 933 // redirects to memhash(p, h, size) using the size 934 // stored in the closure. 935 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 936 GO_ARGS 937 NO_LOCAL_POINTERS 938 MOVL p+0(FP), AX 939 MOVL h+4(FP), BX 940 MOVL 4(DX), CX 941 MOVL AX, 0(SP) 942 MOVL BX, 4(SP) 943 MOVL CX, 8(SP) 944 CALL runtime·memhash(SB) 945 MOVL 12(SP), AX 946 MOVL AX, ret+8(FP) 947 RET 948 949 // hash function using AES hardware instructions 950 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 951 MOVL p+0(FP), AX // ptr to data 952 MOVL s+8(FP), CX // size 953 LEAL ret+12(FP), DX 954 JMP runtime·aeshashbody(SB) 955 956 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 957 MOVL p+0(FP), AX // ptr to string object 958 MOVL 4(AX), CX // length of string 959 MOVL (AX), AX // string data 960 LEAL ret+8(FP), DX 961 JMP runtime·aeshashbody(SB) 962 963 // AX: data 964 // CX: length 965 // DX: address to put return value 966 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 967 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 968 PINSRW $4, CX, X0 // 16 bits of length 969 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 970 MOVO X0, X1 // save unscrambled seed 971 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 972 AESENC X0, X0 // scramble seed 973 974 CMPL CX, $16 975 JB aes0to15 976 JE aes16 977 CMPL CX, $32 978 JBE aes17to32 979 CMPL CX, $64 980 JBE aes33to64 981 JMP aes65plus 982 983 aes0to15: 984 TESTL CX, CX 985 JE aes0 986 987 ADDL $16, AX 988 TESTW $0xff0, AX 989 JE endofpage 990 991 // 16 bytes loaded at this address won't cross 992 // a page boundary, so we can load it directly. 993 MOVOU -16(AX), X1 994 ADDL CX, CX 995 PAND masks<>(SB)(CX*8), X1 996 997 final1: 998 AESENC X0, X1 // scramble input, xor in seed 999 AESENC X1, X1 // scramble combo 2 times 1000 AESENC X1, X1 1001 MOVL X1, (DX) 1002 RET 1003 1004 endofpage: 1005 // address ends in 1111xxxx. Might be up against 1006 // a page boundary, so load ending at last byte. 1007 // Then shift bytes down using pshufb. 1008 MOVOU -32(AX)(CX*1), X1 1009 ADDL CX, CX 1010 PSHUFB shifts<>(SB)(CX*8), X1 1011 JMP final1 1012 1013 aes0: 1014 // Return scrambled input seed 1015 AESENC X0, X0 1016 MOVL X0, (DX) 1017 RET 1018 1019 aes16: 1020 MOVOU (AX), X1 1021 JMP final1 1022 1023 aes17to32: 1024 // make second starting seed 1025 PXOR runtime·aeskeysched+16(SB), X1 1026 AESENC X1, X1 1027 1028 // load data to be hashed 1029 MOVOU (AX), X2 1030 MOVOU -16(AX)(CX*1), X3 1031 1032 // scramble 3 times 1033 AESENC X0, X2 1034 AESENC X1, X3 1035 AESENC X2, X2 1036 AESENC X3, X3 1037 AESENC X2, X2 1038 AESENC X3, X3 1039 1040 // combine results 1041 PXOR X3, X2 1042 MOVL X2, (DX) 1043 RET 1044 1045 aes33to64: 1046 // make 3 more starting seeds 1047 MOVO X1, X2 1048 MOVO X1, X3 1049 PXOR runtime·aeskeysched+16(SB), X1 1050 PXOR runtime·aeskeysched+32(SB), X2 1051 PXOR runtime·aeskeysched+48(SB), X3 1052 AESENC X1, X1 1053 AESENC X2, X2 1054 AESENC X3, X3 1055 1056 MOVOU (AX), X4 1057 MOVOU 16(AX), X5 1058 MOVOU -32(AX)(CX*1), X6 1059 MOVOU -16(AX)(CX*1), X7 1060 1061 AESENC X0, X4 1062 AESENC X1, X5 1063 AESENC X2, X6 1064 AESENC X3, X7 1065 1066 AESENC X4, X4 1067 AESENC X5, X5 1068 AESENC X6, X6 1069 AESENC X7, X7 1070 1071 AESENC X4, X4 1072 AESENC X5, X5 1073 AESENC X6, X6 1074 AESENC X7, X7 1075 1076 PXOR X6, X4 1077 PXOR X7, X5 1078 PXOR X5, X4 1079 MOVL X4, (DX) 1080 RET 1081 1082 aes65plus: 1083 // make 3 more starting seeds 1084 MOVO X1, X2 1085 MOVO X1, X3 1086 PXOR runtime·aeskeysched+16(SB), X1 1087 PXOR runtime·aeskeysched+32(SB), X2 1088 PXOR runtime·aeskeysched+48(SB), X3 1089 AESENC X1, X1 1090 AESENC X2, X2 1091 AESENC X3, X3 1092 1093 // start with last (possibly overlapping) block 1094 MOVOU -64(AX)(CX*1), X4 1095 MOVOU -48(AX)(CX*1), X5 1096 MOVOU -32(AX)(CX*1), X6 1097 MOVOU -16(AX)(CX*1), X7 1098 1099 // scramble state once 1100 AESENC X0, X4 1101 AESENC X1, X5 1102 AESENC X2, X6 1103 AESENC X3, X7 1104 1105 // compute number of remaining 64-byte blocks 1106 DECL CX 1107 SHRL $6, CX 1108 1109 aesloop: 1110 // scramble state, xor in a block 1111 MOVOU (AX), X0 1112 MOVOU 16(AX), X1 1113 MOVOU 32(AX), X2 1114 MOVOU 48(AX), X3 1115 AESENC X0, X4 1116 AESENC X1, X5 1117 AESENC X2, X6 1118 AESENC X3, X7 1119 1120 // scramble state 1121 AESENC X4, X4 1122 AESENC X5, X5 1123 AESENC X6, X6 1124 AESENC X7, X7 1125 1126 ADDL $64, AX 1127 DECL CX 1128 JNE aesloop 1129 1130 // 2 more scrambles to finish 1131 AESENC X4, X4 1132 AESENC X5, X5 1133 AESENC X6, X6 1134 AESENC X7, X7 1135 1136 AESENC X4, X4 1137 AESENC X5, X5 1138 AESENC X6, X6 1139 AESENC X7, X7 1140 1141 PXOR X6, X4 1142 PXOR X7, X5 1143 PXOR X5, X4 1144 MOVL X4, (DX) 1145 RET 1146 1147 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1148 MOVL p+0(FP), AX // ptr to data 1149 MOVL h+4(FP), X0 // seed 1150 PINSRD $1, (AX), X0 // data 1151 AESENC runtime·aeskeysched+0(SB), X0 1152 AESENC runtime·aeskeysched+16(SB), X0 1153 AESENC runtime·aeskeysched+32(SB), X0 1154 MOVL X0, ret+8(FP) 1155 RET 1156 1157 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1158 MOVL p+0(FP), AX // ptr to data 1159 MOVQ (AX), X0 // data 1160 PINSRD $2, h+4(FP), X0 // seed 1161 AESENC runtime·aeskeysched+0(SB), X0 1162 AESENC runtime·aeskeysched+16(SB), X0 1163 AESENC runtime·aeskeysched+32(SB), X0 1164 MOVL X0, ret+8(FP) 1165 RET 1166 1167 // simple mask to get rid of data in the high part of the register. 1168 DATA masks<>+0x00(SB)/4, $0x00000000 1169 DATA masks<>+0x04(SB)/4, $0x00000000 1170 DATA masks<>+0x08(SB)/4, $0x00000000 1171 DATA masks<>+0x0c(SB)/4, $0x00000000 1172 1173 DATA masks<>+0x10(SB)/4, $0x000000ff 1174 DATA masks<>+0x14(SB)/4, $0x00000000 1175 DATA masks<>+0x18(SB)/4, $0x00000000 1176 DATA masks<>+0x1c(SB)/4, $0x00000000 1177 1178 DATA masks<>+0x20(SB)/4, $0x0000ffff 1179 DATA masks<>+0x24(SB)/4, $0x00000000 1180 DATA masks<>+0x28(SB)/4, $0x00000000 1181 DATA masks<>+0x2c(SB)/4, $0x00000000 1182 1183 DATA masks<>+0x30(SB)/4, $0x00ffffff 1184 DATA masks<>+0x34(SB)/4, $0x00000000 1185 DATA masks<>+0x38(SB)/4, $0x00000000 1186 DATA masks<>+0x3c(SB)/4, $0x00000000 1187 1188 DATA masks<>+0x40(SB)/4, $0xffffffff 1189 DATA masks<>+0x44(SB)/4, $0x00000000 1190 DATA masks<>+0x48(SB)/4, $0x00000000 1191 DATA masks<>+0x4c(SB)/4, $0x00000000 1192 1193 DATA masks<>+0x50(SB)/4, $0xffffffff 1194 DATA masks<>+0x54(SB)/4, $0x000000ff 1195 DATA masks<>+0x58(SB)/4, $0x00000000 1196 DATA masks<>+0x5c(SB)/4, $0x00000000 1197 1198 DATA masks<>+0x60(SB)/4, $0xffffffff 1199 DATA masks<>+0x64(SB)/4, $0x0000ffff 1200 DATA masks<>+0x68(SB)/4, $0x00000000 1201 DATA masks<>+0x6c(SB)/4, $0x00000000 1202 1203 DATA masks<>+0x70(SB)/4, $0xffffffff 1204 DATA masks<>+0x74(SB)/4, $0x00ffffff 1205 DATA masks<>+0x78(SB)/4, $0x00000000 1206 DATA masks<>+0x7c(SB)/4, $0x00000000 1207 1208 DATA masks<>+0x80(SB)/4, $0xffffffff 1209 DATA masks<>+0x84(SB)/4, $0xffffffff 1210 DATA masks<>+0x88(SB)/4, $0x00000000 1211 DATA masks<>+0x8c(SB)/4, $0x00000000 1212 1213 DATA masks<>+0x90(SB)/4, $0xffffffff 1214 DATA masks<>+0x94(SB)/4, $0xffffffff 1215 DATA masks<>+0x98(SB)/4, $0x000000ff 1216 DATA masks<>+0x9c(SB)/4, $0x00000000 1217 1218 DATA masks<>+0xa0(SB)/4, $0xffffffff 1219 DATA masks<>+0xa4(SB)/4, $0xffffffff 1220 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1221 DATA masks<>+0xac(SB)/4, $0x00000000 1222 1223 DATA masks<>+0xb0(SB)/4, $0xffffffff 1224 DATA masks<>+0xb4(SB)/4, $0xffffffff 1225 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1226 DATA masks<>+0xbc(SB)/4, $0x00000000 1227 1228 DATA masks<>+0xc0(SB)/4, $0xffffffff 1229 DATA masks<>+0xc4(SB)/4, $0xffffffff 1230 DATA masks<>+0xc8(SB)/4, $0xffffffff 1231 DATA masks<>+0xcc(SB)/4, $0x00000000 1232 1233 DATA masks<>+0xd0(SB)/4, $0xffffffff 1234 DATA masks<>+0xd4(SB)/4, $0xffffffff 1235 DATA masks<>+0xd8(SB)/4, $0xffffffff 1236 DATA masks<>+0xdc(SB)/4, $0x000000ff 1237 1238 DATA masks<>+0xe0(SB)/4, $0xffffffff 1239 DATA masks<>+0xe4(SB)/4, $0xffffffff 1240 DATA masks<>+0xe8(SB)/4, $0xffffffff 1241 DATA masks<>+0xec(SB)/4, $0x0000ffff 1242 1243 DATA masks<>+0xf0(SB)/4, $0xffffffff 1244 DATA masks<>+0xf4(SB)/4, $0xffffffff 1245 DATA masks<>+0xf8(SB)/4, $0xffffffff 1246 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1247 1248 GLOBL masks<>(SB),RODATA,$256 1249 1250 // these are arguments to pshufb. They move data down from 1251 // the high bytes of the register to the low bytes of the register. 1252 // index is how many bytes to move. 1253 DATA shifts<>+0x00(SB)/4, $0x00000000 1254 DATA shifts<>+0x04(SB)/4, $0x00000000 1255 DATA shifts<>+0x08(SB)/4, $0x00000000 1256 DATA shifts<>+0x0c(SB)/4, $0x00000000 1257 1258 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1259 DATA shifts<>+0x14(SB)/4, $0xffffffff 1260 DATA shifts<>+0x18(SB)/4, $0xffffffff 1261 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1262 1263 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1264 DATA shifts<>+0x24(SB)/4, $0xffffffff 1265 DATA shifts<>+0x28(SB)/4, $0xffffffff 1266 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1267 1268 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1269 DATA shifts<>+0x34(SB)/4, $0xffffffff 1270 DATA shifts<>+0x38(SB)/4, $0xffffffff 1271 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1272 1273 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1274 DATA shifts<>+0x44(SB)/4, $0xffffffff 1275 DATA shifts<>+0x48(SB)/4, $0xffffffff 1276 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1277 1278 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1279 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1280 DATA shifts<>+0x58(SB)/4, $0xffffffff 1281 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1282 1283 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1284 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1285 DATA shifts<>+0x68(SB)/4, $0xffffffff 1286 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1287 1288 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1289 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1290 DATA shifts<>+0x78(SB)/4, $0xffffffff 1291 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1292 1293 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1294 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1295 DATA shifts<>+0x88(SB)/4, $0xffffffff 1296 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1297 1298 DATA shifts<>+0x90(SB)/4, $0x0a090807 1299 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1300 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1301 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1302 1303 DATA shifts<>+0xa0(SB)/4, $0x09080706 1304 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1305 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1306 DATA shifts<>+0xac(SB)/4, $0xffffffff 1307 1308 DATA shifts<>+0xb0(SB)/4, $0x08070605 1309 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1310 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1311 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1312 1313 DATA shifts<>+0xc0(SB)/4, $0x07060504 1314 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1315 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1316 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1317 1318 DATA shifts<>+0xd0(SB)/4, $0x06050403 1319 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1320 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1321 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1322 1323 DATA shifts<>+0xe0(SB)/4, $0x05040302 1324 DATA shifts<>+0xe4(SB)/4, $0x09080706 1325 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1326 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1327 1328 DATA shifts<>+0xf0(SB)/4, $0x04030201 1329 DATA shifts<>+0xf4(SB)/4, $0x08070605 1330 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1331 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1332 1333 GLOBL shifts<>(SB),RODATA,$256 1334 1335 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1336 MOVL a+0(FP), SI 1337 MOVL b+4(FP), DI 1338 MOVL size+8(FP), BX 1339 LEAL ret+12(FP), AX 1340 JMP runtime·memeqbody(SB) 1341 1342 // memequal_varlen(a, b unsafe.Pointer) bool 1343 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1344 MOVL a+0(FP), SI 1345 MOVL b+4(FP), DI 1346 CMPL SI, DI 1347 JEQ eq 1348 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1349 LEAL ret+8(FP), AX 1350 JMP runtime·memeqbody(SB) 1351 eq: 1352 MOVB $1, ret+8(FP) 1353 RET 1354 1355 // eqstring tests whether two strings are equal. 1356 // The compiler guarantees that strings passed 1357 // to eqstring have equal length. 1358 // See runtime_test.go:eqstring_generic for 1359 // equivalent Go code. 1360 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1361 MOVL s1str+0(FP), SI 1362 MOVL s2str+8(FP), DI 1363 CMPL SI, DI 1364 JEQ same 1365 MOVL s1len+4(FP), BX 1366 LEAL v+16(FP), AX 1367 JMP runtime·memeqbody(SB) 1368 same: 1369 MOVB $1, v+16(FP) 1370 RET 1371 1372 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1373 MOVL a_len+4(FP), BX 1374 MOVL b_len+16(FP), CX 1375 CMPL BX, CX 1376 JNE eqret 1377 MOVL a+0(FP), SI 1378 MOVL b+12(FP), DI 1379 LEAL ret+24(FP), AX 1380 JMP runtime·memeqbody(SB) 1381 eqret: 1382 MOVB $0, ret+24(FP) 1383 RET 1384 1385 // a in SI 1386 // b in DI 1387 // count in BX 1388 // address of result byte in AX 1389 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1390 CMPL BX, $4 1391 JB small 1392 1393 // 64 bytes at a time using xmm registers 1394 hugeloop: 1395 CMPL BX, $64 1396 JB bigloop 1397 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1398 JE bigloop 1399 MOVOU (SI), X0 1400 MOVOU (DI), X1 1401 MOVOU 16(SI), X2 1402 MOVOU 16(DI), X3 1403 MOVOU 32(SI), X4 1404 MOVOU 32(DI), X5 1405 MOVOU 48(SI), X6 1406 MOVOU 48(DI), X7 1407 PCMPEQB X1, X0 1408 PCMPEQB X3, X2 1409 PCMPEQB X5, X4 1410 PCMPEQB X7, X6 1411 PAND X2, X0 1412 PAND X6, X4 1413 PAND X4, X0 1414 PMOVMSKB X0, DX 1415 ADDL $64, SI 1416 ADDL $64, DI 1417 SUBL $64, BX 1418 CMPL DX, $0xffff 1419 JEQ hugeloop 1420 MOVB $0, (AX) 1421 RET 1422 1423 // 4 bytes at a time using 32-bit register 1424 bigloop: 1425 CMPL BX, $4 1426 JBE leftover 1427 MOVL (SI), CX 1428 MOVL (DI), DX 1429 ADDL $4, SI 1430 ADDL $4, DI 1431 SUBL $4, BX 1432 CMPL CX, DX 1433 JEQ bigloop 1434 MOVB $0, (AX) 1435 RET 1436 1437 // remaining 0-4 bytes 1438 leftover: 1439 MOVL -4(SI)(BX*1), CX 1440 MOVL -4(DI)(BX*1), DX 1441 CMPL CX, DX 1442 SETEQ (AX) 1443 RET 1444 1445 small: 1446 CMPL BX, $0 1447 JEQ equal 1448 1449 LEAL 0(BX*8), CX 1450 NEGL CX 1451 1452 MOVL SI, DX 1453 CMPB DX, $0xfc 1454 JA si_high 1455 1456 // load at SI won't cross a page boundary. 1457 MOVL (SI), SI 1458 JMP si_finish 1459 si_high: 1460 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1461 MOVL -4(SI)(BX*1), SI 1462 SHRL CX, SI 1463 si_finish: 1464 1465 // same for DI. 1466 MOVL DI, DX 1467 CMPB DX, $0xfc 1468 JA di_high 1469 MOVL (DI), DI 1470 JMP di_finish 1471 di_high: 1472 MOVL -4(DI)(BX*1), DI 1473 SHRL CX, DI 1474 di_finish: 1475 1476 SUBL SI, DI 1477 SHLL CX, DI 1478 equal: 1479 SETEQ (AX) 1480 RET 1481 1482 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1483 MOVL s1_base+0(FP), SI 1484 MOVL s1_len+4(FP), BX 1485 MOVL s2_base+8(FP), DI 1486 MOVL s2_len+12(FP), DX 1487 LEAL ret+16(FP), AX 1488 JMP runtime·cmpbody(SB) 1489 1490 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1491 MOVL s1+0(FP), SI 1492 MOVL s1+4(FP), BX 1493 MOVL s2+12(FP), DI 1494 MOVL s2+16(FP), DX 1495 LEAL ret+24(FP), AX 1496 JMP runtime·cmpbody(SB) 1497 1498 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1499 MOVL s+0(FP), SI 1500 MOVL s_len+4(FP), CX 1501 MOVB c+12(FP), AL 1502 MOVL SI, DI 1503 CLD; REPN; SCASB 1504 JZ 3(PC) 1505 MOVL $-1, ret+16(FP) 1506 RET 1507 SUBL SI, DI 1508 SUBL $1, DI 1509 MOVL DI, ret+16(FP) 1510 RET 1511 1512 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1513 MOVL s+0(FP), SI 1514 MOVL s_len+4(FP), CX 1515 MOVB c+8(FP), AL 1516 MOVL SI, DI 1517 CLD; REPN; SCASB 1518 JZ 3(PC) 1519 MOVL $-1, ret+12(FP) 1520 RET 1521 SUBL SI, DI 1522 SUBL $1, DI 1523 MOVL DI, ret+12(FP) 1524 RET 1525 1526 // input: 1527 // SI = a 1528 // DI = b 1529 // BX = alen 1530 // DX = blen 1531 // AX = address of return word (set to 1/0/-1) 1532 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1533 MOVL DX, BP 1534 SUBL BX, DX // DX = blen-alen 1535 CMOVLGT BX, BP // BP = min(alen, blen) 1536 CMPL SI, DI 1537 JEQ allsame 1538 CMPL BP, $4 1539 JB small 1540 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1541 JE mediumloop 1542 largeloop: 1543 CMPL BP, $16 1544 JB mediumloop 1545 MOVOU (SI), X0 1546 MOVOU (DI), X1 1547 PCMPEQB X0, X1 1548 PMOVMSKB X1, BX 1549 XORL $0xffff, BX // convert EQ to NE 1550 JNE diff16 // branch if at least one byte is not equal 1551 ADDL $16, SI 1552 ADDL $16, DI 1553 SUBL $16, BP 1554 JMP largeloop 1555 1556 diff16: 1557 BSFL BX, BX // index of first byte that differs 1558 XORL DX, DX 1559 MOVB (SI)(BX*1), CX 1560 CMPB CX, (DI)(BX*1) 1561 SETHI DX 1562 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1563 MOVL DX, (AX) 1564 RET 1565 1566 mediumloop: 1567 CMPL BP, $4 1568 JBE _0through4 1569 MOVL (SI), BX 1570 MOVL (DI), CX 1571 CMPL BX, CX 1572 JNE diff4 1573 ADDL $4, SI 1574 ADDL $4, DI 1575 SUBL $4, BP 1576 JMP mediumloop 1577 1578 _0through4: 1579 MOVL -4(SI)(BP*1), BX 1580 MOVL -4(DI)(BP*1), CX 1581 CMPL BX, CX 1582 JEQ allsame 1583 1584 diff4: 1585 BSWAPL BX // reverse order of bytes 1586 BSWAPL CX 1587 XORL BX, CX // find bit differences 1588 BSRL CX, CX // index of highest bit difference 1589 SHRL CX, BX // move a's bit to bottom 1590 ANDL $1, BX // mask bit 1591 LEAL -1(BX*2), BX // 1/0 => +1/-1 1592 MOVL BX, (AX) 1593 RET 1594 1595 // 0-3 bytes in common 1596 small: 1597 LEAL (BP*8), CX 1598 NEGL CX 1599 JEQ allsame 1600 1601 // load si 1602 CMPB SI, $0xfc 1603 JA si_high 1604 MOVL (SI), SI 1605 JMP si_finish 1606 si_high: 1607 MOVL -4(SI)(BP*1), SI 1608 SHRL CX, SI 1609 si_finish: 1610 SHLL CX, SI 1611 1612 // same for di 1613 CMPB DI, $0xfc 1614 JA di_high 1615 MOVL (DI), DI 1616 JMP di_finish 1617 di_high: 1618 MOVL -4(DI)(BP*1), DI 1619 SHRL CX, DI 1620 di_finish: 1621 SHLL CX, DI 1622 1623 BSWAPL SI // reverse order of bytes 1624 BSWAPL DI 1625 XORL SI, DI // find bit differences 1626 JEQ allsame 1627 BSRL DI, CX // index of highest bit difference 1628 SHRL CX, SI // move a's bit to bottom 1629 ANDL $1, SI // mask bit 1630 LEAL -1(SI*2), BX // 1/0 => +1/-1 1631 MOVL BX, (AX) 1632 RET 1633 1634 // all the bytes in common are the same, so we just need 1635 // to compare the lengths. 1636 allsame: 1637 XORL BX, BX 1638 XORL CX, CX 1639 TESTL DX, DX 1640 SETLT BX // 1 if alen > blen 1641 SETEQ CX // 1 if alen == blen 1642 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1643 MOVL BX, (AX) 1644 RET 1645 1646 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1647 get_tls(CX) 1648 MOVL g(CX), AX 1649 MOVL g_m(AX), AX 1650 MOVL m_fastrand(AX), DX 1651 ADDL DX, DX 1652 MOVL DX, BX 1653 XORL $0x88888eef, DX 1654 CMOVLMI BX, DX 1655 MOVL DX, m_fastrand(AX) 1656 MOVL DX, ret+0(FP) 1657 RET 1658 1659 TEXT runtime·return0(SB), NOSPLIT, $0 1660 MOVL $0, AX 1661 RET 1662 1663 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1664 // Must obey the gcc calling convention. 1665 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1666 get_tls(CX) 1667 MOVL g(CX), AX 1668 MOVL g_m(AX), AX 1669 MOVL m_curg(AX), AX 1670 MOVL (g_stack+stack_hi)(AX), AX 1671 RET 1672 1673 // The top-most function running on a goroutine 1674 // returns to goexit+PCQuantum. 1675 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1676 BYTE $0x90 // NOP 1677 CALL runtime·goexit1(SB) // does not return 1678 // traceback from goexit1 must hit code range of goexit 1679 BYTE $0x90 // NOP 1680 1681 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1682 MOVL addr+0(FP), AX 1683 PREFETCHT0 (AX) 1684 RET 1685 1686 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1687 MOVL addr+0(FP), AX 1688 PREFETCHT1 (AX) 1689 RET 1690 1691 1692 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1693 MOVL addr+0(FP), AX 1694 PREFETCHT2 (AX) 1695 RET 1696 1697 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1698 MOVL addr+0(FP), AX 1699 PREFETCHNTA (AX) 1700 RET