github.com/jonasi/go@v0.0.0-20150930005915-e78e654c1de0/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it to let it 53 // initialize and to set up GS. if not, 54 // we set up GS ourselves. 55 MOVL _cgo_init(SB), AX 56 TESTL AX, AX 57 JZ needtls 58 MOVL $setg_gcc<>(SB), BX 59 MOVL BX, 4(SP) 60 MOVL BP, 0(SP) 61 CALL AX 62 63 // update stackguard after _cgo_init 64 MOVL $runtime·g0(SB), CX 65 MOVL (g_stack+stack_lo)(CX), AX 66 ADDL $const__StackGuard, AX 67 MOVL AX, g_stackguard0(CX) 68 MOVL AX, g_stackguard1(CX) 69 70 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 71 CMPL runtime·iswindows(SB), $0 72 JEQ ok 73 needtls: 74 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 75 CMPL runtime·isplan9(SB), $1 76 JEQ ok 77 78 // set up %gs 79 CALL runtime·ldt0setup(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVL $0x123, g(BX) 84 MOVL runtime·tls0(SB), AX 85 CMPL AX, $0x123 86 JEQ ok 87 MOVL AX, 0 // abort 88 ok: 89 // set up m and g "registers" 90 get_tls(BX) 91 LEAL runtime·g0(SB), CX 92 MOVL CX, g(BX) 93 LEAL runtime·m0(SB), AX 94 95 // save m->g0 = g0 96 MOVL CX, m_g0(AX) 97 // save g0->m = m0 98 MOVL AX, g_m(CX) 99 100 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 101 102 // convention is D is always cleared 103 CLD 104 105 CALL runtime·check(SB) 106 107 // saved argc, argv 108 MOVL 120(SP), AX 109 MOVL AX, 0(SP) 110 MOVL 124(SP), AX 111 MOVL AX, 4(SP) 112 CALL runtime·args(SB) 113 CALL runtime·osinit(SB) 114 CALL runtime·schedinit(SB) 115 116 // create a new goroutine to start program 117 PUSHL $runtime·mainPC(SB) // entry 118 PUSHL $0 // arg size 119 CALL runtime·newproc(SB) 120 POPL AX 121 POPL AX 122 123 // start this M 124 CALL runtime·mstart(SB) 125 126 INT $3 127 RET 128 129 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 130 GLOBL runtime·mainPC(SB),RODATA,$4 131 132 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 133 INT $3 134 RET 135 136 TEXT runtime·asminit(SB),NOSPLIT,$0-0 137 // Linux and MinGW start the FPU in extended double precision. 138 // Other operating systems use double precision. 139 // Change to double precision to match them, 140 // and to match other hardware that only has double. 141 PUSHL $0x27F 142 FLDCW 0(SP) 143 POPL AX 144 RET 145 146 /* 147 * go-routine 148 */ 149 150 // void gosave(Gobuf*) 151 // save state in Gobuf; setjmp 152 TEXT runtime·gosave(SB), NOSPLIT, $0-4 153 MOVL buf+0(FP), AX // gobuf 154 LEAL buf+0(FP), BX // caller's SP 155 MOVL BX, gobuf_sp(AX) 156 MOVL 0(SP), BX // caller's PC 157 MOVL BX, gobuf_pc(AX) 158 MOVL $0, gobuf_ret(AX) 159 MOVL $0, gobuf_ctxt(AX) 160 get_tls(CX) 161 MOVL g(CX), BX 162 MOVL BX, gobuf_g(AX) 163 RET 164 165 // void gogo(Gobuf*) 166 // restore state from Gobuf; longjmp 167 TEXT runtime·gogo(SB), NOSPLIT, $0-4 168 MOVL buf+0(FP), BX // gobuf 169 MOVL gobuf_g(BX), DX 170 MOVL 0(DX), CX // make sure g != nil 171 get_tls(CX) 172 MOVL DX, g(CX) 173 MOVL gobuf_sp(BX), SP // restore SP 174 MOVL gobuf_ret(BX), AX 175 MOVL gobuf_ctxt(BX), DX 176 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 177 MOVL $0, gobuf_ret(BX) 178 MOVL $0, gobuf_ctxt(BX) 179 MOVL gobuf_pc(BX), BX 180 JMP BX 181 182 // func mcall(fn func(*g)) 183 // Switch to m->g0's stack, call fn(g). 184 // Fn must never return. It should gogo(&g->sched) 185 // to keep running g. 186 TEXT runtime·mcall(SB), NOSPLIT, $0-4 187 MOVL fn+0(FP), DI 188 189 get_tls(CX) 190 MOVL g(CX), AX // save state in g->sched 191 MOVL 0(SP), BX // caller's PC 192 MOVL BX, (g_sched+gobuf_pc)(AX) 193 LEAL fn+0(FP), BX // caller's SP 194 MOVL BX, (g_sched+gobuf_sp)(AX) 195 MOVL AX, (g_sched+gobuf_g)(AX) 196 197 // switch to m->g0 & its stack, call fn 198 MOVL g(CX), BX 199 MOVL g_m(BX), BX 200 MOVL m_g0(BX), SI 201 CMPL SI, AX // if g == m->g0 call badmcall 202 JNE 3(PC) 203 MOVL $runtime·badmcall(SB), AX 204 JMP AX 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 207 PUSHL AX 208 MOVL DI, DX 209 MOVL 0(DI), DI 210 CALL DI 211 POPL AX 212 MOVL $runtime·badmcall2(SB), AX 213 JMP AX 214 RET 215 216 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 217 // of the G stack. We need to distinguish the routine that 218 // lives at the bottom of the G stack from the one that lives 219 // at the top of the system stack because the one at the top of 220 // the system stack terminates the stack walk (see topofstack()). 221 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 222 RET 223 224 // func systemstack(fn func()) 225 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 226 MOVL fn+0(FP), DI // DI = fn 227 get_tls(CX) 228 MOVL g(CX), AX // AX = g 229 MOVL g_m(AX), BX // BX = m 230 231 MOVL m_gsignal(BX), DX // DX = gsignal 232 CMPL AX, DX 233 JEQ noswitch 234 235 MOVL m_g0(BX), DX // DX = g0 236 CMPL AX, DX 237 JEQ noswitch 238 239 MOVL m_curg(BX), BP 240 CMPL AX, BP 241 JEQ switch 242 243 // Bad: g is not gsignal, not g0, not curg. What is it? 244 // Hide call from linker nosplit analysis. 245 MOVL $runtime·badsystemstack(SB), AX 246 CALL AX 247 248 switch: 249 // save our state in g->sched. Pretend to 250 // be systemstack_switch if the G stack is scanned. 251 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 252 MOVL SP, (g_sched+gobuf_sp)(AX) 253 MOVL AX, (g_sched+gobuf_g)(AX) 254 255 // switch to g0 256 MOVL DX, g(CX) 257 MOVL (g_sched+gobuf_sp)(DX), BX 258 // make it look like mstart called systemstack on g0, to stop traceback 259 SUBL $4, BX 260 MOVL $runtime·mstart(SB), DX 261 MOVL DX, 0(BX) 262 MOVL BX, SP 263 264 // call target function 265 MOVL DI, DX 266 MOVL 0(DI), DI 267 CALL DI 268 269 // switch back to g 270 get_tls(CX) 271 MOVL g(CX), AX 272 MOVL g_m(AX), BX 273 MOVL m_curg(BX), AX 274 MOVL AX, g(CX) 275 MOVL (g_sched+gobuf_sp)(AX), SP 276 MOVL $0, (g_sched+gobuf_sp)(AX) 277 RET 278 279 noswitch: 280 // already on system stack, just call directly 281 MOVL DI, DX 282 MOVL 0(DI), DI 283 CALL DI 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // 292 // The traceback routines see morestack on a g0 as being 293 // the top of a stack (for example, morestack calling newstack 294 // calling the scheduler calling newm calling gc), so we must 295 // record an argument size. For that purpose, it has no arguments. 296 TEXT runtime·morestack(SB),NOSPLIT,$0-0 297 // Cannot grow scheduler stack (m->g0). 298 get_tls(CX) 299 MOVL g(CX), BX 300 MOVL g_m(BX), BX 301 MOVL m_g0(BX), SI 302 CMPL g(CX), SI 303 JNE 2(PC) 304 INT $3 305 306 // Cannot grow signal stack. 307 MOVL m_gsignal(BX), SI 308 CMPL g(CX), SI 309 JNE 2(PC) 310 INT $3 311 312 // Called from f. 313 // Set m->morebuf to f's caller. 314 MOVL 4(SP), DI // f's caller's PC 315 MOVL DI, (m_morebuf+gobuf_pc)(BX) 316 LEAL 8(SP), CX // f's caller's SP 317 MOVL CX, (m_morebuf+gobuf_sp)(BX) 318 get_tls(CX) 319 MOVL g(CX), SI 320 MOVL SI, (m_morebuf+gobuf_g)(BX) 321 322 // Set g->sched to context in f. 323 MOVL 0(SP), AX // f's PC 324 MOVL AX, (g_sched+gobuf_pc)(SI) 325 MOVL SI, (g_sched+gobuf_g)(SI) 326 LEAL 4(SP), AX // f's SP 327 MOVL AX, (g_sched+gobuf_sp)(SI) 328 MOVL DX, (g_sched+gobuf_ctxt)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVL m_g0(BX), BP 332 MOVL BP, g(CX) 333 MOVL (g_sched+gobuf_sp)(BP), AX 334 MOVL -4(AX), BX // fault if CALL would, before smashing SP 335 MOVL AX, SP 336 CALL runtime·newstack(SB) 337 MOVL $0, 0x1003 // crash if newstack returns 338 RET 339 340 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 341 MOVL $0, DX 342 JMP runtime·morestack(SB) 343 344 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 345 // We came here via a RET to an overwritten return PC. 346 // AX may be live. Other registers are available. 347 348 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 349 get_tls(CX) 350 MOVL g(CX), CX 351 MOVL (g_stkbar+slice_array)(CX), DX 352 MOVL g_stkbarPos(CX), BX 353 IMULL $stkbar__size, BX // Too big for SIB. 354 MOVL stkbar_savedLRVal(DX)(BX*1), BX 355 // Record that this stack barrier was hit. 356 ADDL $1, g_stkbarPos(CX) 357 // Jump to the original return PC. 358 JMP BX 359 360 // reflectcall: call a function with the given argument list 361 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 362 // we don't have variable-sized frames, so we use a small number 363 // of constant-sized-frame functions to encode a few bits of size in the pc. 364 // Caution: ugly multiline assembly macros in your future! 365 366 #define DISPATCH(NAME,MAXSIZE) \ 367 CMPL CX, $MAXSIZE; \ 368 JA 3(PC); \ 369 MOVL $NAME(SB), AX; \ 370 JMP AX 371 // Note: can't just "JMP NAME(SB)" - bad inlining results. 372 373 TEXT reflect·call(SB), NOSPLIT, $0-0 374 JMP ·reflectcall(SB) 375 376 TEXT ·reflectcall(SB), NOSPLIT, $0-20 377 MOVL argsize+12(FP), CX 378 DISPATCH(runtime·call16, 16) 379 DISPATCH(runtime·call32, 32) 380 DISPATCH(runtime·call64, 64) 381 DISPATCH(runtime·call128, 128) 382 DISPATCH(runtime·call256, 256) 383 DISPATCH(runtime·call512, 512) 384 DISPATCH(runtime·call1024, 1024) 385 DISPATCH(runtime·call2048, 2048) 386 DISPATCH(runtime·call4096, 4096) 387 DISPATCH(runtime·call8192, 8192) 388 DISPATCH(runtime·call16384, 16384) 389 DISPATCH(runtime·call32768, 32768) 390 DISPATCH(runtime·call65536, 65536) 391 DISPATCH(runtime·call131072, 131072) 392 DISPATCH(runtime·call262144, 262144) 393 DISPATCH(runtime·call524288, 524288) 394 DISPATCH(runtime·call1048576, 1048576) 395 DISPATCH(runtime·call2097152, 2097152) 396 DISPATCH(runtime·call4194304, 4194304) 397 DISPATCH(runtime·call8388608, 8388608) 398 DISPATCH(runtime·call16777216, 16777216) 399 DISPATCH(runtime·call33554432, 33554432) 400 DISPATCH(runtime·call67108864, 67108864) 401 DISPATCH(runtime·call134217728, 134217728) 402 DISPATCH(runtime·call268435456, 268435456) 403 DISPATCH(runtime·call536870912, 536870912) 404 DISPATCH(runtime·call1073741824, 1073741824) 405 MOVL $runtime·badreflectcall(SB), AX 406 JMP AX 407 408 #define CALLFN(NAME,MAXSIZE) \ 409 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 410 NO_LOCAL_POINTERS; \ 411 /* copy arguments to stack */ \ 412 MOVL argptr+8(FP), SI; \ 413 MOVL argsize+12(FP), CX; \ 414 MOVL SP, DI; \ 415 REP;MOVSB; \ 416 /* call function */ \ 417 MOVL f+4(FP), DX; \ 418 MOVL (DX), AX; \ 419 PCDATA $PCDATA_StackMapIndex, $0; \ 420 CALL AX; \ 421 /* copy return values back */ \ 422 MOVL argptr+8(FP), DI; \ 423 MOVL argsize+12(FP), CX; \ 424 MOVL retoffset+16(FP), BX; \ 425 MOVL SP, SI; \ 426 ADDL BX, DI; \ 427 ADDL BX, SI; \ 428 SUBL BX, CX; \ 429 REP;MOVSB; \ 430 /* execute write barrier updates */ \ 431 MOVL argtype+0(FP), DX; \ 432 MOVL argptr+8(FP), DI; \ 433 MOVL argsize+12(FP), CX; \ 434 MOVL retoffset+16(FP), BX; \ 435 MOVL DX, 0(SP); \ 436 MOVL DI, 4(SP); \ 437 MOVL CX, 8(SP); \ 438 MOVL BX, 12(SP); \ 439 CALL runtime·callwritebarrier(SB); \ 440 RET 441 442 CALLFN(·call16, 16) 443 CALLFN(·call32, 32) 444 CALLFN(·call64, 64) 445 CALLFN(·call128, 128) 446 CALLFN(·call256, 256) 447 CALLFN(·call512, 512) 448 CALLFN(·call1024, 1024) 449 CALLFN(·call2048, 2048) 450 CALLFN(·call4096, 4096) 451 CALLFN(·call8192, 8192) 452 CALLFN(·call16384, 16384) 453 CALLFN(·call32768, 32768) 454 CALLFN(·call65536, 65536) 455 CALLFN(·call131072, 131072) 456 CALLFN(·call262144, 262144) 457 CALLFN(·call524288, 524288) 458 CALLFN(·call1048576, 1048576) 459 CALLFN(·call2097152, 2097152) 460 CALLFN(·call4194304, 4194304) 461 CALLFN(·call8388608, 8388608) 462 CALLFN(·call16777216, 16777216) 463 CALLFN(·call33554432, 33554432) 464 CALLFN(·call67108864, 67108864) 465 CALLFN(·call134217728, 134217728) 466 CALLFN(·call268435456, 268435456) 467 CALLFN(·call536870912, 536870912) 468 CALLFN(·call1073741824, 1073741824) 469 470 // bool cas(int32 *val, int32 old, int32 new) 471 // Atomically: 472 // if(*val == old){ 473 // *val = new; 474 // return 1; 475 // }else 476 // return 0; 477 TEXT runtime·cas(SB), NOSPLIT, $0-13 478 MOVL ptr+0(FP), BX 479 MOVL old+4(FP), AX 480 MOVL new+8(FP), CX 481 LOCK 482 CMPXCHGL CX, 0(BX) 483 SETEQ ret+12(FP) 484 RET 485 486 TEXT runtime·casuintptr(SB), NOSPLIT, $0-13 487 JMP runtime·cas(SB) 488 489 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8 490 JMP runtime·atomicload(SB) 491 492 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8 493 JMP runtime·atomicload(SB) 494 495 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8 496 JMP runtime·atomicstore(SB) 497 498 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 499 // Atomically: 500 // if(*val == *old){ 501 // *val = new; 502 // return 1; 503 // } else { 504 // return 0; 505 // } 506 TEXT runtime·cas64(SB), NOSPLIT, $0-21 507 MOVL ptr+0(FP), BP 508 MOVL old_lo+4(FP), AX 509 MOVL old_hi+8(FP), DX 510 MOVL new_lo+12(FP), BX 511 MOVL new_hi+16(FP), CX 512 LOCK 513 CMPXCHG8B 0(BP) 514 SETEQ ret+20(FP) 515 RET 516 517 // bool casp(void **p, void *old, void *new) 518 // Atomically: 519 // if(*p == old){ 520 // *p = new; 521 // return 1; 522 // }else 523 // return 0; 524 TEXT runtime·casp1(SB), NOSPLIT, $0-13 525 MOVL ptr+0(FP), BX 526 MOVL old+4(FP), AX 527 MOVL new+8(FP), CX 528 LOCK 529 CMPXCHGL CX, 0(BX) 530 SETEQ ret+12(FP) 531 RET 532 533 // uint32 xadd(uint32 volatile *val, int32 delta) 534 // Atomically: 535 // *val += delta; 536 // return *val; 537 TEXT runtime·xadd(SB), NOSPLIT, $0-12 538 MOVL ptr+0(FP), BX 539 MOVL delta+4(FP), AX 540 MOVL AX, CX 541 LOCK 542 XADDL AX, 0(BX) 543 ADDL CX, AX 544 MOVL AX, ret+8(FP) 545 RET 546 547 TEXT runtime·xchg(SB), NOSPLIT, $0-12 548 MOVL ptr+0(FP), BX 549 MOVL new+4(FP), AX 550 XCHGL AX, 0(BX) 551 MOVL AX, ret+8(FP) 552 RET 553 554 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 555 JMP runtime·xchg(SB) 556 557 TEXT runtime·procyield(SB),NOSPLIT,$0-0 558 MOVL cycles+0(FP), AX 559 again: 560 PAUSE 561 SUBL $1, AX 562 JNZ again 563 RET 564 565 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 566 MOVL ptr+0(FP), BX 567 MOVL val+4(FP), AX 568 XCHGL AX, 0(BX) 569 RET 570 571 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 572 MOVL ptr+0(FP), BX 573 MOVL val+4(FP), AX 574 XCHGL AX, 0(BX) 575 RET 576 577 // uint64 atomicload64(uint64 volatile* addr); 578 TEXT runtime·atomicload64(SB), NOSPLIT, $0-12 579 MOVL ptr+0(FP), AX 580 TESTL $7, AX 581 JZ 2(PC) 582 MOVL 0, AX // crash with nil ptr deref 583 LEAL ret_lo+4(FP), BX 584 // MOVQ (%EAX), %MM0 585 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 586 // MOVQ %MM0, 0(%EBX) 587 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 588 // EMMS 589 BYTE $0x0F; BYTE $0x77 590 RET 591 592 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); 593 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 594 MOVL ptr+0(FP), AX 595 TESTL $7, AX 596 JZ 2(PC) 597 MOVL 0, AX // crash with nil ptr deref 598 // MOVQ and EMMS were introduced on the Pentium MMX. 599 // MOVQ 0x8(%ESP), %MM0 600 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 601 // MOVQ %MM0, (%EAX) 602 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 603 // EMMS 604 BYTE $0x0F; BYTE $0x77 605 // This is essentially a no-op, but it provides required memory fencing. 606 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 607 MOVL $0, AX 608 LOCK 609 XADDL AX, (SP) 610 RET 611 612 // void runtime·atomicor8(byte volatile*, byte); 613 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 614 MOVL ptr+0(FP), AX 615 MOVB val+4(FP), BX 616 LOCK 617 ORB BX, (AX) 618 RET 619 620 // void runtime·atomicand8(byte volatile*, byte); 621 TEXT runtime·atomicand8(SB), NOSPLIT, $0-5 622 MOVL ptr+0(FP), AX 623 MOVB val+4(FP), BX 624 LOCK 625 ANDB BX, (AX) 626 RET 627 628 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 629 // Stores are already ordered on x86, so this is just a 630 // compile barrier. 631 RET 632 633 // void jmpdefer(fn, sp); 634 // called from deferreturn. 635 // 1. pop the caller 636 // 2. sub 5 bytes from the callers return 637 // 3. jmp to the argument 638 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 639 MOVL fv+0(FP), DX // fn 640 MOVL argp+4(FP), BX // caller sp 641 LEAL -4(BX), SP // caller sp after CALL 642 SUBL $5, (SP) // return to CALL again 643 MOVL 0(DX), BX 644 JMP BX // but first run the deferred function 645 646 // Save state of caller into g->sched. 647 TEXT gosave<>(SB),NOSPLIT,$0 648 PUSHL AX 649 PUSHL BX 650 get_tls(BX) 651 MOVL g(BX), BX 652 LEAL arg+0(FP), AX 653 MOVL AX, (g_sched+gobuf_sp)(BX) 654 MOVL -4(AX), AX 655 MOVL AX, (g_sched+gobuf_pc)(BX) 656 MOVL $0, (g_sched+gobuf_ret)(BX) 657 MOVL $0, (g_sched+gobuf_ctxt)(BX) 658 POPL BX 659 POPL AX 660 RET 661 662 // func asmcgocall(fn, arg unsafe.Pointer) int32 663 // Call fn(arg) on the scheduler stack, 664 // aligned appropriately for the gcc ABI. 665 // See cgocall.go for more details. 666 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 667 MOVL fn+0(FP), AX 668 MOVL arg+4(FP), BX 669 670 MOVL SP, DX 671 672 // Figure out if we need to switch to m->g0 stack. 673 // We get called to create new OS threads too, and those 674 // come in on the m->g0 stack already. 675 get_tls(CX) 676 MOVL g(CX), BP 677 MOVL g_m(BP), BP 678 MOVL m_g0(BP), SI 679 MOVL g(CX), DI 680 CMPL SI, DI 681 JEQ 4(PC) 682 CALL gosave<>(SB) 683 MOVL SI, g(CX) 684 MOVL (g_sched+gobuf_sp)(SI), SP 685 686 // Now on a scheduling stack (a pthread-created stack). 687 SUBL $32, SP 688 ANDL $~15, SP // alignment, perhaps unnecessary 689 MOVL DI, 8(SP) // save g 690 MOVL (g_stack+stack_hi)(DI), DI 691 SUBL DX, DI 692 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 693 MOVL BX, 0(SP) // first argument in x86-32 ABI 694 CALL AX 695 696 // Restore registers, g, stack pointer. 697 get_tls(CX) 698 MOVL 8(SP), DI 699 MOVL (g_stack+stack_hi)(DI), SI 700 SUBL 4(SP), SI 701 MOVL DI, g(CX) 702 MOVL SI, SP 703 704 MOVL AX, ret+8(FP) 705 RET 706 707 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 708 // Turn the fn into a Go func (by taking its address) and call 709 // cgocallback_gofunc. 710 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 711 LEAL fn+0(FP), AX 712 MOVL AX, 0(SP) 713 MOVL frame+4(FP), AX 714 MOVL AX, 4(SP) 715 MOVL framesize+8(FP), AX 716 MOVL AX, 8(SP) 717 MOVL $runtime·cgocallback_gofunc(SB), AX 718 CALL AX 719 RET 720 721 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 722 // See cgocall.go for more details. 723 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 724 NO_LOCAL_POINTERS 725 726 // If g is nil, Go did not create the current thread. 727 // Call needm to obtain one for temporary use. 728 // In this case, we're running on the thread stack, so there's 729 // lots of space, but the linker doesn't know. Hide the call from 730 // the linker analysis by using an indirect call through AX. 731 get_tls(CX) 732 #ifdef GOOS_windows 733 MOVL $0, BP 734 CMPL CX, $0 735 JEQ 2(PC) // TODO 736 #endif 737 MOVL g(CX), BP 738 CMPL BP, $0 739 JEQ needm 740 MOVL g_m(BP), BP 741 MOVL BP, DX // saved copy of oldm 742 JMP havem 743 needm: 744 MOVL $0, 0(SP) 745 MOVL $runtime·needm(SB), AX 746 CALL AX 747 MOVL 0(SP), DX 748 get_tls(CX) 749 MOVL g(CX), BP 750 MOVL g_m(BP), BP 751 752 // Set m->sched.sp = SP, so that if a panic happens 753 // during the function we are about to execute, it will 754 // have a valid SP to run on the g0 stack. 755 // The next few lines (after the havem label) 756 // will save this SP onto the stack and then write 757 // the same SP back to m->sched.sp. That seems redundant, 758 // but if an unrecovered panic happens, unwindm will 759 // restore the g->sched.sp from the stack location 760 // and then systemstack will try to use it. If we don't set it here, 761 // that restored SP will be uninitialized (typically 0) and 762 // will not be usable. 763 MOVL m_g0(BP), SI 764 MOVL SP, (g_sched+gobuf_sp)(SI) 765 766 havem: 767 // Now there's a valid m, and we're running on its m->g0. 768 // Save current m->g0->sched.sp on stack and then set it to SP. 769 // Save current sp in m->g0->sched.sp in preparation for 770 // switch back to m->curg stack. 771 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 772 MOVL m_g0(BP), SI 773 MOVL (g_sched+gobuf_sp)(SI), AX 774 MOVL AX, 0(SP) 775 MOVL SP, (g_sched+gobuf_sp)(SI) 776 777 // Switch to m->curg stack and call runtime.cgocallbackg. 778 // Because we are taking over the execution of m->curg 779 // but *not* resuming what had been running, we need to 780 // save that information (m->curg->sched) so we can restore it. 781 // We can restore m->curg->sched.sp easily, because calling 782 // runtime.cgocallbackg leaves SP unchanged upon return. 783 // To save m->curg->sched.pc, we push it onto the stack. 784 // This has the added benefit that it looks to the traceback 785 // routine like cgocallbackg is going to return to that 786 // PC (because the frame we allocate below has the same 787 // size as cgocallback_gofunc's frame declared above) 788 // so that the traceback will seamlessly trace back into 789 // the earlier calls. 790 // 791 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 792 // 4(SP) and 8(SP) are unused. 793 MOVL m_curg(BP), SI 794 MOVL SI, g(CX) 795 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 796 MOVL (g_sched+gobuf_pc)(SI), BP 797 MOVL BP, -4(DI) 798 LEAL -(4+12)(DI), SP 799 MOVL DX, 0(SP) 800 CALL runtime·cgocallbackg(SB) 801 MOVL 0(SP), DX 802 803 // Restore g->sched (== m->curg->sched) from saved values. 804 get_tls(CX) 805 MOVL g(CX), SI 806 MOVL 12(SP), BP 807 MOVL BP, (g_sched+gobuf_pc)(SI) 808 LEAL (12+4)(SP), DI 809 MOVL DI, (g_sched+gobuf_sp)(SI) 810 811 // Switch back to m->g0's stack and restore m->g0->sched.sp. 812 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 813 // so we do not have to restore it.) 814 MOVL g(CX), BP 815 MOVL g_m(BP), BP 816 MOVL m_g0(BP), SI 817 MOVL SI, g(CX) 818 MOVL (g_sched+gobuf_sp)(SI), SP 819 MOVL 0(SP), AX 820 MOVL AX, (g_sched+gobuf_sp)(SI) 821 822 // If the m on entry was nil, we called needm above to borrow an m 823 // for the duration of the call. Since the call is over, return it with dropm. 824 CMPL DX, $0 825 JNE 3(PC) 826 MOVL $runtime·dropm(SB), AX 827 CALL AX 828 829 // Done! 830 RET 831 832 // void setg(G*); set g. for use by needm. 833 TEXT runtime·setg(SB), NOSPLIT, $0-4 834 MOVL gg+0(FP), BX 835 #ifdef GOOS_windows 836 CMPL BX, $0 837 JNE settls 838 MOVL $0, 0x14(FS) 839 RET 840 settls: 841 MOVL g_m(BX), AX 842 LEAL m_tls(AX), AX 843 MOVL AX, 0x14(FS) 844 #endif 845 get_tls(CX) 846 MOVL BX, g(CX) 847 RET 848 849 // void setg_gcc(G*); set g. for use by gcc 850 TEXT setg_gcc<>(SB), NOSPLIT, $0 851 get_tls(AX) 852 MOVL gg+0(FP), DX 853 MOVL DX, g(AX) 854 RET 855 856 // check that SP is in range [g->stack.lo, g->stack.hi) 857 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 858 get_tls(CX) 859 MOVL g(CX), AX 860 CMPL (g_stack+stack_hi)(AX), SP 861 JHI 2(PC) 862 INT $3 863 CMPL SP, (g_stack+stack_lo)(AX) 864 JHI 2(PC) 865 INT $3 866 RET 867 868 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 869 MOVL argp+0(FP),AX // addr of first arg 870 MOVL -4(AX),AX // get calling pc 871 CMPL AX, runtime·stackBarrierPC(SB) 872 JNE nobar 873 // Get original return PC. 874 CALL runtime·nextBarrierPC(SB) 875 MOVL 0(SP), AX 876 nobar: 877 MOVL AX, ret+4(FP) 878 RET 879 880 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 881 MOVL argp+0(FP),AX // addr of first arg 882 MOVL pc+4(FP), BX 883 MOVL -4(AX), CX 884 CMPL CX, runtime·stackBarrierPC(SB) 885 JEQ setbar 886 MOVL BX, -4(AX) // set calling pc 887 RET 888 setbar: 889 // Set the stack barrier return PC. 890 MOVL BX, 0(SP) 891 CALL runtime·setNextBarrierPC(SB) 892 RET 893 894 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 895 MOVL argp+0(FP), AX 896 MOVL AX, ret+4(FP) 897 RET 898 899 // func cputicks() int64 900 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 901 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 902 JEQ done 903 CMPB runtime·lfenceBeforeRdtsc(SB), $1 904 JNE mfence 905 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 906 JMP done 907 mfence: 908 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 909 done: 910 RDTSC 911 MOVL AX, ret_lo+0(FP) 912 MOVL DX, ret_hi+4(FP) 913 RET 914 915 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 916 // set up ldt 7 to point at tls0 917 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 918 // the entry number is just a hint. setldt will set up GS with what it used. 919 MOVL $7, 0(SP) 920 LEAL runtime·tls0(SB), AX 921 MOVL AX, 4(SP) 922 MOVL $32, 8(SP) // sizeof(tls array) 923 CALL runtime·setldt(SB) 924 RET 925 926 TEXT runtime·emptyfunc(SB),0,$0-0 927 RET 928 929 TEXT runtime·abort(SB),NOSPLIT,$0-0 930 INT $0x3 931 932 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 933 // redirects to memhash(p, h, size) using the size 934 // stored in the closure. 935 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 936 GO_ARGS 937 NO_LOCAL_POINTERS 938 MOVL p+0(FP), AX 939 MOVL h+4(FP), BX 940 MOVL 4(DX), CX 941 MOVL AX, 0(SP) 942 MOVL BX, 4(SP) 943 MOVL CX, 8(SP) 944 CALL runtime·memhash(SB) 945 MOVL 12(SP), AX 946 MOVL AX, ret+8(FP) 947 RET 948 949 // hash function using AES hardware instructions 950 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 951 MOVL p+0(FP), AX // ptr to data 952 MOVL s+8(FP), CX // size 953 LEAL ret+12(FP), DX 954 JMP runtime·aeshashbody(SB) 955 956 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 957 MOVL p+0(FP), AX // ptr to string object 958 MOVL 4(AX), CX // length of string 959 MOVL (AX), AX // string data 960 LEAL ret+8(FP), DX 961 JMP runtime·aeshashbody(SB) 962 963 // AX: data 964 // CX: length 965 // DX: address to put return value 966 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 967 MOVL h+4(FP), X6 // seed to low 64 bits of xmm6 968 PINSRD $2, CX, X6 // size to high 64 bits of xmm6 969 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 970 MOVO runtime·aeskeysched(SB), X7 971 CMPL CX, $16 972 JB aes0to15 973 JE aes16 974 CMPL CX, $32 975 JBE aes17to32 976 CMPL CX, $64 977 JBE aes33to64 978 JMP aes65plus 979 980 aes0to15: 981 TESTL CX, CX 982 JE aes0 983 984 ADDL $16, AX 985 TESTW $0xff0, AX 986 JE endofpage 987 988 // 16 bytes loaded at this address won't cross 989 // a page boundary, so we can load it directly. 990 MOVOU -16(AX), X0 991 ADDL CX, CX 992 PAND masks<>(SB)(CX*8), X0 993 994 // scramble 3 times 995 AESENC X6, X0 996 AESENC X7, X0 997 AESENC X7, X0 998 MOVL X0, (DX) 999 RET 1000 1001 endofpage: 1002 // address ends in 1111xxxx. Might be up against 1003 // a page boundary, so load ending at last byte. 1004 // Then shift bytes down using pshufb. 1005 MOVOU -32(AX)(CX*1), X0 1006 ADDL CX, CX 1007 PSHUFB shifts<>(SB)(CX*8), X0 1008 AESENC X6, X0 1009 AESENC X7, X0 1010 AESENC X7, X0 1011 MOVL X0, (DX) 1012 RET 1013 1014 aes0: 1015 // Return scrambled input seed 1016 AESENC X7, X6 1017 AESENC X7, X6 1018 MOVL X6, (DX) 1019 RET 1020 1021 aes16: 1022 MOVOU (AX), X0 1023 AESENC X6, X0 1024 AESENC X7, X0 1025 AESENC X7, X0 1026 MOVL X0, (DX) 1027 RET 1028 1029 1030 aes17to32: 1031 // load data to be hashed 1032 MOVOU (AX), X0 1033 MOVOU -16(AX)(CX*1), X1 1034 1035 // scramble 3 times 1036 AESENC X6, X0 1037 AESENC runtime·aeskeysched+16(SB), X1 1038 AESENC X7, X0 1039 AESENC X7, X1 1040 AESENC X7, X0 1041 AESENC X7, X1 1042 1043 // combine results 1044 PXOR X1, X0 1045 MOVL X0, (DX) 1046 RET 1047 1048 aes33to64: 1049 MOVOU (AX), X0 1050 MOVOU 16(AX), X1 1051 MOVOU -32(AX)(CX*1), X2 1052 MOVOU -16(AX)(CX*1), X3 1053 1054 AESENC X6, X0 1055 AESENC runtime·aeskeysched+16(SB), X1 1056 AESENC runtime·aeskeysched+32(SB), X2 1057 AESENC runtime·aeskeysched+48(SB), X3 1058 AESENC X7, X0 1059 AESENC X7, X1 1060 AESENC X7, X2 1061 AESENC X7, X3 1062 AESENC X7, X0 1063 AESENC X7, X1 1064 AESENC X7, X2 1065 AESENC X7, X3 1066 1067 PXOR X2, X0 1068 PXOR X3, X1 1069 PXOR X1, X0 1070 MOVL X0, (DX) 1071 RET 1072 1073 aes65plus: 1074 // start with last (possibly overlapping) block 1075 MOVOU -64(AX)(CX*1), X0 1076 MOVOU -48(AX)(CX*1), X1 1077 MOVOU -32(AX)(CX*1), X2 1078 MOVOU -16(AX)(CX*1), X3 1079 1080 // scramble state once 1081 AESENC X6, X0 1082 AESENC runtime·aeskeysched+16(SB), X1 1083 AESENC runtime·aeskeysched+32(SB), X2 1084 AESENC runtime·aeskeysched+48(SB), X3 1085 1086 // compute number of remaining 64-byte blocks 1087 DECL CX 1088 SHRL $6, CX 1089 1090 aesloop: 1091 // scramble state, xor in a block 1092 MOVOU (AX), X4 1093 MOVOU 16(AX), X5 1094 AESENC X4, X0 1095 AESENC X5, X1 1096 MOVOU 32(AX), X4 1097 MOVOU 48(AX), X5 1098 AESENC X4, X2 1099 AESENC X5, X3 1100 1101 // scramble state 1102 AESENC X7, X0 1103 AESENC X7, X1 1104 AESENC X7, X2 1105 AESENC X7, X3 1106 1107 ADDL $64, AX 1108 DECL CX 1109 JNE aesloop 1110 1111 // 2 more scrambles to finish 1112 AESENC X7, X0 1113 AESENC X7, X1 1114 AESENC X7, X2 1115 AESENC X7, X3 1116 AESENC X7, X0 1117 AESENC X7, X1 1118 AESENC X7, X2 1119 AESENC X7, X3 1120 1121 PXOR X2, X0 1122 PXOR X3, X1 1123 PXOR X1, X0 1124 MOVL X0, (DX) 1125 RET 1126 1127 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1128 MOVL p+0(FP), AX // ptr to data 1129 MOVL h+4(FP), X0 // seed 1130 PINSRD $1, (AX), X0 // data 1131 AESENC runtime·aeskeysched+0(SB), X0 1132 AESENC runtime·aeskeysched+16(SB), X0 1133 AESENC runtime·aeskeysched+32(SB), X0 1134 MOVL X0, ret+8(FP) 1135 RET 1136 1137 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1138 MOVL p+0(FP), AX // ptr to data 1139 MOVQ (AX), X0 // data 1140 PINSRD $2, h+4(FP), X0 // seed 1141 AESENC runtime·aeskeysched+0(SB), X0 1142 AESENC runtime·aeskeysched+16(SB), X0 1143 AESENC runtime·aeskeysched+32(SB), X0 1144 MOVL X0, ret+8(FP) 1145 RET 1146 1147 // simple mask to get rid of data in the high part of the register. 1148 DATA masks<>+0x00(SB)/4, $0x00000000 1149 DATA masks<>+0x04(SB)/4, $0x00000000 1150 DATA masks<>+0x08(SB)/4, $0x00000000 1151 DATA masks<>+0x0c(SB)/4, $0x00000000 1152 1153 DATA masks<>+0x10(SB)/4, $0x000000ff 1154 DATA masks<>+0x14(SB)/4, $0x00000000 1155 DATA masks<>+0x18(SB)/4, $0x00000000 1156 DATA masks<>+0x1c(SB)/4, $0x00000000 1157 1158 DATA masks<>+0x20(SB)/4, $0x0000ffff 1159 DATA masks<>+0x24(SB)/4, $0x00000000 1160 DATA masks<>+0x28(SB)/4, $0x00000000 1161 DATA masks<>+0x2c(SB)/4, $0x00000000 1162 1163 DATA masks<>+0x30(SB)/4, $0x00ffffff 1164 DATA masks<>+0x34(SB)/4, $0x00000000 1165 DATA masks<>+0x38(SB)/4, $0x00000000 1166 DATA masks<>+0x3c(SB)/4, $0x00000000 1167 1168 DATA masks<>+0x40(SB)/4, $0xffffffff 1169 DATA masks<>+0x44(SB)/4, $0x00000000 1170 DATA masks<>+0x48(SB)/4, $0x00000000 1171 DATA masks<>+0x4c(SB)/4, $0x00000000 1172 1173 DATA masks<>+0x50(SB)/4, $0xffffffff 1174 DATA masks<>+0x54(SB)/4, $0x000000ff 1175 DATA masks<>+0x58(SB)/4, $0x00000000 1176 DATA masks<>+0x5c(SB)/4, $0x00000000 1177 1178 DATA masks<>+0x60(SB)/4, $0xffffffff 1179 DATA masks<>+0x64(SB)/4, $0x0000ffff 1180 DATA masks<>+0x68(SB)/4, $0x00000000 1181 DATA masks<>+0x6c(SB)/4, $0x00000000 1182 1183 DATA masks<>+0x70(SB)/4, $0xffffffff 1184 DATA masks<>+0x74(SB)/4, $0x00ffffff 1185 DATA masks<>+0x78(SB)/4, $0x00000000 1186 DATA masks<>+0x7c(SB)/4, $0x00000000 1187 1188 DATA masks<>+0x80(SB)/4, $0xffffffff 1189 DATA masks<>+0x84(SB)/4, $0xffffffff 1190 DATA masks<>+0x88(SB)/4, $0x00000000 1191 DATA masks<>+0x8c(SB)/4, $0x00000000 1192 1193 DATA masks<>+0x90(SB)/4, $0xffffffff 1194 DATA masks<>+0x94(SB)/4, $0xffffffff 1195 DATA masks<>+0x98(SB)/4, $0x000000ff 1196 DATA masks<>+0x9c(SB)/4, $0x00000000 1197 1198 DATA masks<>+0xa0(SB)/4, $0xffffffff 1199 DATA masks<>+0xa4(SB)/4, $0xffffffff 1200 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1201 DATA masks<>+0xac(SB)/4, $0x00000000 1202 1203 DATA masks<>+0xb0(SB)/4, $0xffffffff 1204 DATA masks<>+0xb4(SB)/4, $0xffffffff 1205 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1206 DATA masks<>+0xbc(SB)/4, $0x00000000 1207 1208 DATA masks<>+0xc0(SB)/4, $0xffffffff 1209 DATA masks<>+0xc4(SB)/4, $0xffffffff 1210 DATA masks<>+0xc8(SB)/4, $0xffffffff 1211 DATA masks<>+0xcc(SB)/4, $0x00000000 1212 1213 DATA masks<>+0xd0(SB)/4, $0xffffffff 1214 DATA masks<>+0xd4(SB)/4, $0xffffffff 1215 DATA masks<>+0xd8(SB)/4, $0xffffffff 1216 DATA masks<>+0xdc(SB)/4, $0x000000ff 1217 1218 DATA masks<>+0xe0(SB)/4, $0xffffffff 1219 DATA masks<>+0xe4(SB)/4, $0xffffffff 1220 DATA masks<>+0xe8(SB)/4, $0xffffffff 1221 DATA masks<>+0xec(SB)/4, $0x0000ffff 1222 1223 DATA masks<>+0xf0(SB)/4, $0xffffffff 1224 DATA masks<>+0xf4(SB)/4, $0xffffffff 1225 DATA masks<>+0xf8(SB)/4, $0xffffffff 1226 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1227 1228 GLOBL masks<>(SB),RODATA,$256 1229 1230 // these are arguments to pshufb. They move data down from 1231 // the high bytes of the register to the low bytes of the register. 1232 // index is how many bytes to move. 1233 DATA shifts<>+0x00(SB)/4, $0x00000000 1234 DATA shifts<>+0x04(SB)/4, $0x00000000 1235 DATA shifts<>+0x08(SB)/4, $0x00000000 1236 DATA shifts<>+0x0c(SB)/4, $0x00000000 1237 1238 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1239 DATA shifts<>+0x14(SB)/4, $0xffffffff 1240 DATA shifts<>+0x18(SB)/4, $0xffffffff 1241 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1242 1243 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1244 DATA shifts<>+0x24(SB)/4, $0xffffffff 1245 DATA shifts<>+0x28(SB)/4, $0xffffffff 1246 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1247 1248 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1249 DATA shifts<>+0x34(SB)/4, $0xffffffff 1250 DATA shifts<>+0x38(SB)/4, $0xffffffff 1251 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1252 1253 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1254 DATA shifts<>+0x44(SB)/4, $0xffffffff 1255 DATA shifts<>+0x48(SB)/4, $0xffffffff 1256 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1257 1258 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1259 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1260 DATA shifts<>+0x58(SB)/4, $0xffffffff 1261 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1262 1263 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1264 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1265 DATA shifts<>+0x68(SB)/4, $0xffffffff 1266 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1267 1268 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1269 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1270 DATA shifts<>+0x78(SB)/4, $0xffffffff 1271 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1272 1273 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1274 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1275 DATA shifts<>+0x88(SB)/4, $0xffffffff 1276 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1277 1278 DATA shifts<>+0x90(SB)/4, $0x0a090807 1279 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1280 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1281 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1282 1283 DATA shifts<>+0xa0(SB)/4, $0x09080706 1284 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1285 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1286 DATA shifts<>+0xac(SB)/4, $0xffffffff 1287 1288 DATA shifts<>+0xb0(SB)/4, $0x08070605 1289 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1290 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1291 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1292 1293 DATA shifts<>+0xc0(SB)/4, $0x07060504 1294 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1295 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1296 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1297 1298 DATA shifts<>+0xd0(SB)/4, $0x06050403 1299 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1300 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1301 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1302 1303 DATA shifts<>+0xe0(SB)/4, $0x05040302 1304 DATA shifts<>+0xe4(SB)/4, $0x09080706 1305 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1306 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1307 1308 DATA shifts<>+0xf0(SB)/4, $0x04030201 1309 DATA shifts<>+0xf4(SB)/4, $0x08070605 1310 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1311 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1312 1313 GLOBL shifts<>(SB),RODATA,$256 1314 1315 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1316 MOVL a+0(FP), SI 1317 MOVL b+4(FP), DI 1318 MOVL size+8(FP), BX 1319 LEAL ret+12(FP), AX 1320 JMP runtime·memeqbody(SB) 1321 1322 // memequal_varlen(a, b unsafe.Pointer) bool 1323 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1324 MOVL a+0(FP), SI 1325 MOVL b+4(FP), DI 1326 CMPL SI, DI 1327 JEQ eq 1328 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1329 LEAL ret+8(FP), AX 1330 JMP runtime·memeqbody(SB) 1331 eq: 1332 MOVB $1, ret+8(FP) 1333 RET 1334 1335 // eqstring tests whether two strings are equal. 1336 // The compiler guarantees that strings passed 1337 // to eqstring have equal length. 1338 // See runtime_test.go:eqstring_generic for 1339 // equivalent Go code. 1340 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1341 MOVL s1str+0(FP), SI 1342 MOVL s2str+8(FP), DI 1343 CMPL SI, DI 1344 JEQ same 1345 MOVL s1len+4(FP), BX 1346 LEAL v+16(FP), AX 1347 JMP runtime·memeqbody(SB) 1348 same: 1349 MOVB $1, v+16(FP) 1350 RET 1351 1352 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1353 MOVL a_len+4(FP), BX 1354 MOVL b_len+16(FP), CX 1355 CMPL BX, CX 1356 JNE eqret 1357 MOVL a+0(FP), SI 1358 MOVL b+12(FP), DI 1359 LEAL ret+24(FP), AX 1360 JMP runtime·memeqbody(SB) 1361 eqret: 1362 MOVB $0, ret+24(FP) 1363 RET 1364 1365 // a in SI 1366 // b in DI 1367 // count in BX 1368 // address of result byte in AX 1369 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1370 CMPL BX, $4 1371 JB small 1372 1373 // 64 bytes at a time using xmm registers 1374 hugeloop: 1375 CMPL BX, $64 1376 JB bigloop 1377 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1378 JE bigloop 1379 MOVOU (SI), X0 1380 MOVOU (DI), X1 1381 MOVOU 16(SI), X2 1382 MOVOU 16(DI), X3 1383 MOVOU 32(SI), X4 1384 MOVOU 32(DI), X5 1385 MOVOU 48(SI), X6 1386 MOVOU 48(DI), X7 1387 PCMPEQB X1, X0 1388 PCMPEQB X3, X2 1389 PCMPEQB X5, X4 1390 PCMPEQB X7, X6 1391 PAND X2, X0 1392 PAND X6, X4 1393 PAND X4, X0 1394 PMOVMSKB X0, DX 1395 ADDL $64, SI 1396 ADDL $64, DI 1397 SUBL $64, BX 1398 CMPL DX, $0xffff 1399 JEQ hugeloop 1400 MOVB $0, (AX) 1401 RET 1402 1403 // 4 bytes at a time using 32-bit register 1404 bigloop: 1405 CMPL BX, $4 1406 JBE leftover 1407 MOVL (SI), CX 1408 MOVL (DI), DX 1409 ADDL $4, SI 1410 ADDL $4, DI 1411 SUBL $4, BX 1412 CMPL CX, DX 1413 JEQ bigloop 1414 MOVB $0, (AX) 1415 RET 1416 1417 // remaining 0-4 bytes 1418 leftover: 1419 MOVL -4(SI)(BX*1), CX 1420 MOVL -4(DI)(BX*1), DX 1421 CMPL CX, DX 1422 SETEQ (AX) 1423 RET 1424 1425 small: 1426 CMPL BX, $0 1427 JEQ equal 1428 1429 LEAL 0(BX*8), CX 1430 NEGL CX 1431 1432 MOVL SI, DX 1433 CMPB DX, $0xfc 1434 JA si_high 1435 1436 // load at SI won't cross a page boundary. 1437 MOVL (SI), SI 1438 JMP si_finish 1439 si_high: 1440 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1441 MOVL -4(SI)(BX*1), SI 1442 SHRL CX, SI 1443 si_finish: 1444 1445 // same for DI. 1446 MOVL DI, DX 1447 CMPB DX, $0xfc 1448 JA di_high 1449 MOVL (DI), DI 1450 JMP di_finish 1451 di_high: 1452 MOVL -4(DI)(BX*1), DI 1453 SHRL CX, DI 1454 di_finish: 1455 1456 SUBL SI, DI 1457 SHLL CX, DI 1458 equal: 1459 SETEQ (AX) 1460 RET 1461 1462 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1463 MOVL s1_base+0(FP), SI 1464 MOVL s1_len+4(FP), BX 1465 MOVL s2_base+8(FP), DI 1466 MOVL s2_len+12(FP), DX 1467 LEAL ret+16(FP), AX 1468 JMP runtime·cmpbody(SB) 1469 1470 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1471 MOVL s1+0(FP), SI 1472 MOVL s1+4(FP), BX 1473 MOVL s2+12(FP), DI 1474 MOVL s2+16(FP), DX 1475 LEAL ret+24(FP), AX 1476 JMP runtime·cmpbody(SB) 1477 1478 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1479 MOVL s+0(FP), SI 1480 MOVL s_len+4(FP), CX 1481 MOVB c+12(FP), AL 1482 MOVL SI, DI 1483 CLD; REPN; SCASB 1484 JZ 3(PC) 1485 MOVL $-1, ret+16(FP) 1486 RET 1487 SUBL SI, DI 1488 SUBL $1, DI 1489 MOVL DI, ret+16(FP) 1490 RET 1491 1492 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1493 MOVL s+0(FP), SI 1494 MOVL s_len+4(FP), CX 1495 MOVB c+8(FP), AL 1496 MOVL SI, DI 1497 CLD; REPN; SCASB 1498 JZ 3(PC) 1499 MOVL $-1, ret+12(FP) 1500 RET 1501 SUBL SI, DI 1502 SUBL $1, DI 1503 MOVL DI, ret+12(FP) 1504 RET 1505 1506 // input: 1507 // SI = a 1508 // DI = b 1509 // BX = alen 1510 // DX = blen 1511 // AX = address of return word (set to 1/0/-1) 1512 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1513 MOVL DX, BP 1514 SUBL BX, DX // DX = blen-alen 1515 CMOVLGT BX, BP // BP = min(alen, blen) 1516 CMPL SI, DI 1517 JEQ allsame 1518 CMPL BP, $4 1519 JB small 1520 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1521 JE mediumloop 1522 largeloop: 1523 CMPL BP, $16 1524 JB mediumloop 1525 MOVOU (SI), X0 1526 MOVOU (DI), X1 1527 PCMPEQB X0, X1 1528 PMOVMSKB X1, BX 1529 XORL $0xffff, BX // convert EQ to NE 1530 JNE diff16 // branch if at least one byte is not equal 1531 ADDL $16, SI 1532 ADDL $16, DI 1533 SUBL $16, BP 1534 JMP largeloop 1535 1536 diff16: 1537 BSFL BX, BX // index of first byte that differs 1538 XORL DX, DX 1539 MOVB (SI)(BX*1), CX 1540 CMPB CX, (DI)(BX*1) 1541 SETHI DX 1542 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1543 MOVL DX, (AX) 1544 RET 1545 1546 mediumloop: 1547 CMPL BP, $4 1548 JBE _0through4 1549 MOVL (SI), BX 1550 MOVL (DI), CX 1551 CMPL BX, CX 1552 JNE diff4 1553 ADDL $4, SI 1554 ADDL $4, DI 1555 SUBL $4, BP 1556 JMP mediumloop 1557 1558 _0through4: 1559 MOVL -4(SI)(BP*1), BX 1560 MOVL -4(DI)(BP*1), CX 1561 CMPL BX, CX 1562 JEQ allsame 1563 1564 diff4: 1565 BSWAPL BX // reverse order of bytes 1566 BSWAPL CX 1567 XORL BX, CX // find bit differences 1568 BSRL CX, CX // index of highest bit difference 1569 SHRL CX, BX // move a's bit to bottom 1570 ANDL $1, BX // mask bit 1571 LEAL -1(BX*2), BX // 1/0 => +1/-1 1572 MOVL BX, (AX) 1573 RET 1574 1575 // 0-3 bytes in common 1576 small: 1577 LEAL (BP*8), CX 1578 NEGL CX 1579 JEQ allsame 1580 1581 // load si 1582 CMPB SI, $0xfc 1583 JA si_high 1584 MOVL (SI), SI 1585 JMP si_finish 1586 si_high: 1587 MOVL -4(SI)(BP*1), SI 1588 SHRL CX, SI 1589 si_finish: 1590 SHLL CX, SI 1591 1592 // same for di 1593 CMPB DI, $0xfc 1594 JA di_high 1595 MOVL (DI), DI 1596 JMP di_finish 1597 di_high: 1598 MOVL -4(DI)(BP*1), DI 1599 SHRL CX, DI 1600 di_finish: 1601 SHLL CX, DI 1602 1603 BSWAPL SI // reverse order of bytes 1604 BSWAPL DI 1605 XORL SI, DI // find bit differences 1606 JEQ allsame 1607 BSRL DI, CX // index of highest bit difference 1608 SHRL CX, SI // move a's bit to bottom 1609 ANDL $1, SI // mask bit 1610 LEAL -1(SI*2), BX // 1/0 => +1/-1 1611 MOVL BX, (AX) 1612 RET 1613 1614 // all the bytes in common are the same, so we just need 1615 // to compare the lengths. 1616 allsame: 1617 XORL BX, BX 1618 XORL CX, CX 1619 TESTL DX, DX 1620 SETLT BX // 1 if alen > blen 1621 SETEQ CX // 1 if alen == blen 1622 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1623 MOVL BX, (AX) 1624 RET 1625 1626 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1627 get_tls(CX) 1628 MOVL g(CX), AX 1629 MOVL g_m(AX), AX 1630 MOVL m_fastrand(AX), DX 1631 ADDL DX, DX 1632 MOVL DX, BX 1633 XORL $0x88888eef, DX 1634 CMOVLMI BX, DX 1635 MOVL DX, m_fastrand(AX) 1636 MOVL DX, ret+0(FP) 1637 RET 1638 1639 TEXT runtime·return0(SB), NOSPLIT, $0 1640 MOVL $0, AX 1641 RET 1642 1643 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1644 // Must obey the gcc calling convention. 1645 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1646 get_tls(CX) 1647 MOVL g(CX), AX 1648 MOVL g_m(AX), AX 1649 MOVL m_curg(AX), AX 1650 MOVL (g_stack+stack_hi)(AX), AX 1651 RET 1652 1653 // The top-most function running on a goroutine 1654 // returns to goexit+PCQuantum. 1655 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1656 BYTE $0x90 // NOP 1657 CALL runtime·goexit1(SB) // does not return 1658 // traceback from goexit1 must hit code range of goexit 1659 BYTE $0x90 // NOP 1660 1661 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1662 MOVL addr+0(FP), AX 1663 PREFETCHT0 (AX) 1664 RET 1665 1666 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1667 MOVL addr+0(FP), AX 1668 PREFETCHT1 (AX) 1669 RET 1670 1671 1672 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1673 MOVL addr+0(FP), AX 1674 PREFETCHT2 (AX) 1675 RET 1676 1677 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1678 MOVL addr+0(FP), AX 1679 PREFETCHNTA (AX) 1680 RET