rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it to let it 53 // initialize and to set up GS. if not, 54 // we set up GS ourselves. 55 MOVL _cgo_init(SB), AX 56 TESTL AX, AX 57 JZ needtls 58 MOVL $setg_gcc<>(SB), BX 59 MOVL BX, 4(SP) 60 MOVL BP, 0(SP) 61 CALL AX 62 63 // update stackguard after _cgo_init 64 MOVL $runtime·g0(SB), CX 65 MOVL (g_stack+stack_lo)(CX), AX 66 ADDL $const__StackGuard, AX 67 MOVL AX, g_stackguard0(CX) 68 MOVL AX, g_stackguard1(CX) 69 70 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 71 CMPL runtime·iswindows(SB), $0 72 JEQ ok 73 needtls: 74 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 75 CMPL runtime·isplan9(SB), $1 76 JEQ ok 77 78 // set up %gs 79 CALL runtime·ldt0setup(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVL $0x123, g(BX) 84 MOVL runtime·tls0(SB), AX 85 CMPL AX, $0x123 86 JEQ ok 87 MOVL AX, 0 // abort 88 ok: 89 // set up m and g "registers" 90 get_tls(BX) 91 LEAL runtime·g0(SB), CX 92 MOVL CX, g(BX) 93 LEAL runtime·m0(SB), AX 94 95 // save m->g0 = g0 96 MOVL CX, m_g0(AX) 97 // save g0->m = m0 98 MOVL AX, g_m(CX) 99 100 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 101 102 // convention is D is always cleared 103 CLD 104 105 CALL runtime·check(SB) 106 107 // saved argc, argv 108 MOVL 120(SP), AX 109 MOVL AX, 0(SP) 110 MOVL 124(SP), AX 111 MOVL AX, 4(SP) 112 CALL runtime·args(SB) 113 CALL runtime·osinit(SB) 114 CALL runtime·schedinit(SB) 115 116 // create a new goroutine to start program 117 PUSHL $runtime·mainPC(SB) // entry 118 PUSHL $0 // arg size 119 CALL runtime·newproc(SB) 120 POPL AX 121 POPL AX 122 123 // start this M 124 CALL runtime·mstart(SB) 125 126 INT $3 127 RET 128 129 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 130 GLOBL runtime·mainPC(SB),RODATA,$4 131 132 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 133 INT $3 134 RET 135 136 TEXT runtime·asminit(SB),NOSPLIT,$0-0 137 // Linux and MinGW start the FPU in extended double precision. 138 // Other operating systems use double precision. 139 // Change to double precision to match them, 140 // and to match other hardware that only has double. 141 PUSHL $0x27F 142 FLDCW 0(SP) 143 POPL AX 144 RET 145 146 /* 147 * go-routine 148 */ 149 150 // void gosave(Gobuf*) 151 // save state in Gobuf; setjmp 152 TEXT runtime·gosave(SB), NOSPLIT, $0-4 153 MOVL buf+0(FP), AX // gobuf 154 LEAL buf+0(FP), BX // caller's SP 155 MOVL BX, gobuf_sp(AX) 156 MOVL 0(SP), BX // caller's PC 157 MOVL BX, gobuf_pc(AX) 158 MOVL $0, gobuf_ret(AX) 159 MOVL $0, gobuf_ctxt(AX) 160 get_tls(CX) 161 MOVL g(CX), BX 162 MOVL BX, gobuf_g(AX) 163 RET 164 165 // void gogo(Gobuf*) 166 // restore state from Gobuf; longjmp 167 TEXT runtime·gogo(SB), NOSPLIT, $0-4 168 MOVL buf+0(FP), BX // gobuf 169 MOVL gobuf_g(BX), DX 170 MOVL 0(DX), CX // make sure g != nil 171 get_tls(CX) 172 MOVL DX, g(CX) 173 MOVL gobuf_sp(BX), SP // restore SP 174 MOVL gobuf_ret(BX), AX 175 MOVL gobuf_ctxt(BX), DX 176 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 177 MOVL $0, gobuf_ret(BX) 178 MOVL $0, gobuf_ctxt(BX) 179 MOVL gobuf_pc(BX), BX 180 JMP BX 181 182 // func mcall(fn func(*g)) 183 // Switch to m->g0's stack, call fn(g). 184 // Fn must never return. It should gogo(&g->sched) 185 // to keep running g. 186 TEXT runtime·mcall(SB), NOSPLIT, $0-4 187 MOVL fn+0(FP), DI 188 189 get_tls(CX) 190 MOVL g(CX), AX // save state in g->sched 191 MOVL 0(SP), BX // caller's PC 192 MOVL BX, (g_sched+gobuf_pc)(AX) 193 LEAL fn+0(FP), BX // caller's SP 194 MOVL BX, (g_sched+gobuf_sp)(AX) 195 MOVL AX, (g_sched+gobuf_g)(AX) 196 197 // switch to m->g0 & its stack, call fn 198 MOVL g(CX), BX 199 MOVL g_m(BX), BX 200 MOVL m_g0(BX), SI 201 CMPL SI, AX // if g == m->g0 call badmcall 202 JNE 3(PC) 203 MOVL $runtime·badmcall(SB), AX 204 JMP AX 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 207 PUSHL AX 208 MOVL DI, DX 209 MOVL 0(DI), DI 210 CALL DI 211 POPL AX 212 MOVL $runtime·badmcall2(SB), AX 213 JMP AX 214 RET 215 216 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 217 // of the G stack. We need to distinguish the routine that 218 // lives at the bottom of the G stack from the one that lives 219 // at the top of the system stack because the one at the top of 220 // the system stack terminates the stack walk (see topofstack()). 221 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 222 RET 223 224 // func systemstack(fn func()) 225 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 226 MOVL fn+0(FP), DI // DI = fn 227 get_tls(CX) 228 MOVL g(CX), AX // AX = g 229 MOVL g_m(AX), BX // BX = m 230 231 MOVL m_gsignal(BX), DX // DX = gsignal 232 CMPL AX, DX 233 JEQ noswitch 234 235 MOVL m_g0(BX), DX // DX = g0 236 CMPL AX, DX 237 JEQ noswitch 238 239 MOVL m_curg(BX), BP 240 CMPL AX, BP 241 JEQ switch 242 243 // Bad: g is not gsignal, not g0, not curg. What is it? 244 // Hide call from linker nosplit analysis. 245 MOVL $runtime·badsystemstack(SB), AX 246 CALL AX 247 248 switch: 249 // save our state in g->sched. Pretend to 250 // be systemstack_switch if the G stack is scanned. 251 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 252 MOVL SP, (g_sched+gobuf_sp)(AX) 253 MOVL AX, (g_sched+gobuf_g)(AX) 254 255 // switch to g0 256 MOVL DX, g(CX) 257 MOVL (g_sched+gobuf_sp)(DX), BX 258 // make it look like mstart called systemstack on g0, to stop traceback 259 SUBL $4, BX 260 MOVL $runtime·mstart(SB), DX 261 MOVL DX, 0(BX) 262 MOVL BX, SP 263 264 // call target function 265 MOVL DI, DX 266 MOVL 0(DI), DI 267 CALL DI 268 269 // switch back to g 270 get_tls(CX) 271 MOVL g(CX), AX 272 MOVL g_m(AX), BX 273 MOVL m_curg(BX), AX 274 MOVL AX, g(CX) 275 MOVL (g_sched+gobuf_sp)(AX), SP 276 MOVL $0, (g_sched+gobuf_sp)(AX) 277 RET 278 279 noswitch: 280 // already on system stack, just call directly 281 MOVL DI, DX 282 MOVL 0(DI), DI 283 CALL DI 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // 292 // The traceback routines see morestack on a g0 as being 293 // the top of a stack (for example, morestack calling newstack 294 // calling the scheduler calling newm calling gc), so we must 295 // record an argument size. For that purpose, it has no arguments. 296 TEXT runtime·morestack(SB),NOSPLIT,$0-0 297 // Cannot grow scheduler stack (m->g0). 298 get_tls(CX) 299 MOVL g(CX), BX 300 MOVL g_m(BX), BX 301 MOVL m_g0(BX), SI 302 CMPL g(CX), SI 303 JNE 2(PC) 304 INT $3 305 306 // Cannot grow signal stack. 307 MOVL m_gsignal(BX), SI 308 CMPL g(CX), SI 309 JNE 2(PC) 310 INT $3 311 312 // Called from f. 313 // Set m->morebuf to f's caller. 314 MOVL 4(SP), DI // f's caller's PC 315 MOVL DI, (m_morebuf+gobuf_pc)(BX) 316 LEAL 8(SP), CX // f's caller's SP 317 MOVL CX, (m_morebuf+gobuf_sp)(BX) 318 get_tls(CX) 319 MOVL g(CX), SI 320 MOVL SI, (m_morebuf+gobuf_g)(BX) 321 322 // Set g->sched to context in f. 323 MOVL 0(SP), AX // f's PC 324 MOVL AX, (g_sched+gobuf_pc)(SI) 325 MOVL SI, (g_sched+gobuf_g)(SI) 326 LEAL 4(SP), AX // f's SP 327 MOVL AX, (g_sched+gobuf_sp)(SI) 328 MOVL DX, (g_sched+gobuf_ctxt)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVL m_g0(BX), BP 332 MOVL BP, g(CX) 333 MOVL (g_sched+gobuf_sp)(BP), AX 334 MOVL -4(AX), BX // fault if CALL would, before smashing SP 335 MOVL AX, SP 336 CALL runtime·newstack(SB) 337 MOVL $0, 0x1003 // crash if newstack returns 338 RET 339 340 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 341 MOVL $0, DX 342 JMP runtime·morestack(SB) 343 344 // reflectcall: call a function with the given argument list 345 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 346 // we don't have variable-sized frames, so we use a small number 347 // of constant-sized-frame functions to encode a few bits of size in the pc. 348 // Caution: ugly multiline assembly macros in your future! 349 350 #define DISPATCH(NAME,MAXSIZE) \ 351 CMPL CX, $MAXSIZE; \ 352 JA 3(PC); \ 353 MOVL $NAME(SB), AX; \ 354 JMP AX 355 // Note: can't just "JMP NAME(SB)" - bad inlining results. 356 357 TEXT reflect·call(SB), NOSPLIT, $0-0 358 JMP ·reflectcall(SB) 359 360 TEXT ·reflectcall(SB), NOSPLIT, $0-20 361 MOVL argsize+12(FP), CX 362 DISPATCH(runtime·call16, 16) 363 DISPATCH(runtime·call32, 32) 364 DISPATCH(runtime·call64, 64) 365 DISPATCH(runtime·call128, 128) 366 DISPATCH(runtime·call256, 256) 367 DISPATCH(runtime·call512, 512) 368 DISPATCH(runtime·call1024, 1024) 369 DISPATCH(runtime·call2048, 2048) 370 DISPATCH(runtime·call4096, 4096) 371 DISPATCH(runtime·call8192, 8192) 372 DISPATCH(runtime·call16384, 16384) 373 DISPATCH(runtime·call32768, 32768) 374 DISPATCH(runtime·call65536, 65536) 375 DISPATCH(runtime·call131072, 131072) 376 DISPATCH(runtime·call262144, 262144) 377 DISPATCH(runtime·call524288, 524288) 378 DISPATCH(runtime·call1048576, 1048576) 379 DISPATCH(runtime·call2097152, 2097152) 380 DISPATCH(runtime·call4194304, 4194304) 381 DISPATCH(runtime·call8388608, 8388608) 382 DISPATCH(runtime·call16777216, 16777216) 383 DISPATCH(runtime·call33554432, 33554432) 384 DISPATCH(runtime·call67108864, 67108864) 385 DISPATCH(runtime·call134217728, 134217728) 386 DISPATCH(runtime·call268435456, 268435456) 387 DISPATCH(runtime·call536870912, 536870912) 388 DISPATCH(runtime·call1073741824, 1073741824) 389 MOVL $runtime·badreflectcall(SB), AX 390 JMP AX 391 392 #define CALLFN(NAME,MAXSIZE) \ 393 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 394 NO_LOCAL_POINTERS; \ 395 /* copy arguments to stack */ \ 396 MOVL argptr+8(FP), SI; \ 397 MOVL argsize+12(FP), CX; \ 398 MOVL SP, DI; \ 399 REP;MOVSB; \ 400 /* call function */ \ 401 MOVL f+4(FP), DX; \ 402 MOVL (DX), AX; \ 403 PCDATA $PCDATA_StackMapIndex, $0; \ 404 CALL AX; \ 405 /* copy return values back */ \ 406 MOVL argptr+8(FP), DI; \ 407 MOVL argsize+12(FP), CX; \ 408 MOVL retoffset+16(FP), BX; \ 409 MOVL SP, SI; \ 410 ADDL BX, DI; \ 411 ADDL BX, SI; \ 412 SUBL BX, CX; \ 413 REP;MOVSB; \ 414 /* execute write barrier updates */ \ 415 MOVL argtype+0(FP), DX; \ 416 MOVL argptr+8(FP), DI; \ 417 MOVL argsize+12(FP), CX; \ 418 MOVL retoffset+16(FP), BX; \ 419 MOVL DX, 0(SP); \ 420 MOVL DI, 4(SP); \ 421 MOVL CX, 8(SP); \ 422 MOVL BX, 12(SP); \ 423 CALL runtime·callwritebarrier(SB); \ 424 RET 425 426 CALLFN(·call16, 16) 427 CALLFN(·call32, 32) 428 CALLFN(·call64, 64) 429 CALLFN(·call128, 128) 430 CALLFN(·call256, 256) 431 CALLFN(·call512, 512) 432 CALLFN(·call1024, 1024) 433 CALLFN(·call2048, 2048) 434 CALLFN(·call4096, 4096) 435 CALLFN(·call8192, 8192) 436 CALLFN(·call16384, 16384) 437 CALLFN(·call32768, 32768) 438 CALLFN(·call65536, 65536) 439 CALLFN(·call131072, 131072) 440 CALLFN(·call262144, 262144) 441 CALLFN(·call524288, 524288) 442 CALLFN(·call1048576, 1048576) 443 CALLFN(·call2097152, 2097152) 444 CALLFN(·call4194304, 4194304) 445 CALLFN(·call8388608, 8388608) 446 CALLFN(·call16777216, 16777216) 447 CALLFN(·call33554432, 33554432) 448 CALLFN(·call67108864, 67108864) 449 CALLFN(·call134217728, 134217728) 450 CALLFN(·call268435456, 268435456) 451 CALLFN(·call536870912, 536870912) 452 CALLFN(·call1073741824, 1073741824) 453 454 // bool cas(int32 *val, int32 old, int32 new) 455 // Atomically: 456 // if(*val == old){ 457 // *val = new; 458 // return 1; 459 // }else 460 // return 0; 461 TEXT runtime·cas(SB), NOSPLIT, $0-13 462 MOVL ptr+0(FP), BX 463 MOVL old+4(FP), AX 464 MOVL new+8(FP), CX 465 LOCK 466 CMPXCHGL CX, 0(BX) 467 SETEQ ret+12(FP) 468 RET 469 470 TEXT runtime·casuintptr(SB), NOSPLIT, $0-13 471 JMP runtime·cas(SB) 472 473 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8 474 JMP runtime·atomicload(SB) 475 476 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8 477 JMP runtime·atomicload(SB) 478 479 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8 480 JMP runtime·atomicstore(SB) 481 482 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 483 // Atomically: 484 // if(*val == *old){ 485 // *val = new; 486 // return 1; 487 // } else { 488 // return 0; 489 // } 490 TEXT runtime·cas64(SB), NOSPLIT, $0-21 491 MOVL ptr+0(FP), BP 492 MOVL old_lo+4(FP), AX 493 MOVL old_hi+8(FP), DX 494 MOVL new_lo+12(FP), BX 495 MOVL new_hi+16(FP), CX 496 LOCK 497 CMPXCHG8B 0(BP) 498 SETEQ ret+20(FP) 499 RET 500 501 // bool casp(void **p, void *old, void *new) 502 // Atomically: 503 // if(*p == old){ 504 // *p = new; 505 // return 1; 506 // }else 507 // return 0; 508 TEXT runtime·casp1(SB), NOSPLIT, $0-13 509 MOVL ptr+0(FP), BX 510 MOVL old+4(FP), AX 511 MOVL new+8(FP), CX 512 LOCK 513 CMPXCHGL CX, 0(BX) 514 SETEQ ret+12(FP) 515 RET 516 517 // uint32 xadd(uint32 volatile *val, int32 delta) 518 // Atomically: 519 // *val += delta; 520 // return *val; 521 TEXT runtime·xadd(SB), NOSPLIT, $0-12 522 MOVL ptr+0(FP), BX 523 MOVL delta+4(FP), AX 524 MOVL AX, CX 525 LOCK 526 XADDL AX, 0(BX) 527 ADDL CX, AX 528 MOVL AX, ret+8(FP) 529 RET 530 531 TEXT runtime·xchg(SB), NOSPLIT, $0-12 532 MOVL ptr+0(FP), BX 533 MOVL new+4(FP), AX 534 XCHGL AX, 0(BX) 535 MOVL AX, ret+8(FP) 536 RET 537 538 TEXT runtime·xchgp1(SB), NOSPLIT, $0-12 539 MOVL ptr+0(FP), BX 540 MOVL new+4(FP), AX 541 XCHGL AX, 0(BX) 542 MOVL AX, ret+8(FP) 543 RET 544 545 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 546 JMP runtime·xchg(SB) 547 548 TEXT runtime·procyield(SB),NOSPLIT,$0-0 549 MOVL cycles+0(FP), AX 550 again: 551 PAUSE 552 SUBL $1, AX 553 JNZ again 554 RET 555 556 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 557 MOVL ptr+0(FP), BX 558 MOVL val+4(FP), AX 559 XCHGL AX, 0(BX) 560 RET 561 562 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 563 MOVL ptr+0(FP), BX 564 MOVL val+4(FP), AX 565 XCHGL AX, 0(BX) 566 RET 567 568 // uint64 atomicload64(uint64 volatile* addr); 569 TEXT runtime·atomicload64(SB), NOSPLIT, $0-12 570 MOVL ptr+0(FP), AX 571 TESTL $7, AX 572 JZ 2(PC) 573 MOVL 0, AX // crash with nil ptr deref 574 LEAL ret_lo+4(FP), BX 575 // MOVQ (%EAX), %MM0 576 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 577 // MOVQ %MM0, 0(%EBX) 578 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 579 // EMMS 580 BYTE $0x0F; BYTE $0x77 581 RET 582 583 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); 584 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 585 MOVL ptr+0(FP), AX 586 TESTL $7, AX 587 JZ 2(PC) 588 MOVL 0, AX // crash with nil ptr deref 589 // MOVQ and EMMS were introduced on the Pentium MMX. 590 // MOVQ 0x8(%ESP), %MM0 591 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 592 // MOVQ %MM0, (%EAX) 593 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 594 // EMMS 595 BYTE $0x0F; BYTE $0x77 596 // This is essentially a no-op, but it provides required memory fencing. 597 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 598 MOVL $0, AX 599 LOCK 600 XADDL AX, (SP) 601 RET 602 603 // void runtime·atomicor8(byte volatile*, byte); 604 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 605 MOVL ptr+0(FP), AX 606 MOVB val+4(FP), BX 607 LOCK 608 ORB BX, (AX) 609 RET 610 611 // void runtime·atomicand8(byte volatile*, byte); 612 TEXT runtime·atomicand8(SB), NOSPLIT, $0-5 613 MOVL ptr+0(FP), AX 614 MOVB val+4(FP), BX 615 LOCK 616 ANDB BX, (AX) 617 RET 618 619 // void jmpdefer(fn, sp); 620 // called from deferreturn. 621 // 1. pop the caller 622 // 2. sub 5 bytes from the callers return 623 // 3. jmp to the argument 624 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 625 MOVL fv+0(FP), DX // fn 626 MOVL argp+4(FP), BX // caller sp 627 LEAL -4(BX), SP // caller sp after CALL 628 SUBL $5, (SP) // return to CALL again 629 MOVL 0(DX), BX 630 JMP BX // but first run the deferred function 631 632 // Save state of caller into g->sched. 633 TEXT gosave<>(SB),NOSPLIT,$0 634 PUSHL AX 635 PUSHL BX 636 get_tls(BX) 637 MOVL g(BX), BX 638 LEAL arg+0(FP), AX 639 MOVL AX, (g_sched+gobuf_sp)(BX) 640 MOVL -4(AX), AX 641 MOVL AX, (g_sched+gobuf_pc)(BX) 642 MOVL $0, (g_sched+gobuf_ret)(BX) 643 MOVL $0, (g_sched+gobuf_ctxt)(BX) 644 POPL BX 645 POPL AX 646 RET 647 648 // asmcgocall(void(*fn)(void*), void *arg) 649 // Call fn(arg) on the scheduler stack, 650 // aligned appropriately for the gcc ABI. 651 // See cgocall.c for more details. 652 TEXT ·asmcgocall(SB),NOSPLIT,$0-8 653 MOVL fn+0(FP), AX 654 MOVL arg+4(FP), BX 655 CALL asmcgocall<>(SB) 656 RET 657 658 TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-12 659 MOVL fn+0(FP), AX 660 MOVL arg+4(FP), BX 661 CALL asmcgocall<>(SB) 662 MOVL AX, ret+8(FP) 663 RET 664 665 TEXT asmcgocall<>(SB),NOSPLIT,$0-0 666 // fn in AX, arg in BX 667 MOVL SP, DX 668 669 // Figure out if we need to switch to m->g0 stack. 670 // We get called to create new OS threads too, and those 671 // come in on the m->g0 stack already. 672 get_tls(CX) 673 MOVL g(CX), BP 674 MOVL g_m(BP), BP 675 MOVL m_g0(BP), SI 676 MOVL g(CX), DI 677 CMPL SI, DI 678 JEQ 4(PC) 679 CALL gosave<>(SB) 680 MOVL SI, g(CX) 681 MOVL (g_sched+gobuf_sp)(SI), SP 682 683 // Now on a scheduling stack (a pthread-created stack). 684 SUBL $32, SP 685 ANDL $~15, SP // alignment, perhaps unnecessary 686 MOVL DI, 8(SP) // save g 687 MOVL (g_stack+stack_hi)(DI), DI 688 SUBL DX, DI 689 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 690 MOVL BX, 0(SP) // first argument in x86-32 ABI 691 CALL AX 692 693 // Restore registers, g, stack pointer. 694 get_tls(CX) 695 MOVL 8(SP), DI 696 MOVL (g_stack+stack_hi)(DI), SI 697 SUBL 4(SP), SI 698 MOVL DI, g(CX) 699 MOVL SI, SP 700 RET 701 702 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 703 // Turn the fn into a Go func (by taking its address) and call 704 // cgocallback_gofunc. 705 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 706 LEAL fn+0(FP), AX 707 MOVL AX, 0(SP) 708 MOVL frame+4(FP), AX 709 MOVL AX, 4(SP) 710 MOVL framesize+8(FP), AX 711 MOVL AX, 8(SP) 712 MOVL $runtime·cgocallback_gofunc(SB), AX 713 CALL AX 714 RET 715 716 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 717 // See cgocall.c for more details. 718 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 719 NO_LOCAL_POINTERS 720 721 // If g is nil, Go did not create the current thread. 722 // Call needm to obtain one for temporary use. 723 // In this case, we're running on the thread stack, so there's 724 // lots of space, but the linker doesn't know. Hide the call from 725 // the linker analysis by using an indirect call through AX. 726 get_tls(CX) 727 #ifdef GOOS_windows 728 MOVL $0, BP 729 CMPL CX, $0 730 JEQ 2(PC) // TODO 731 #endif 732 MOVL g(CX), BP 733 CMPL BP, $0 734 JEQ needm 735 MOVL g_m(BP), BP 736 MOVL BP, DX // saved copy of oldm 737 JMP havem 738 needm: 739 MOVL $0, 0(SP) 740 MOVL $runtime·needm(SB), AX 741 CALL AX 742 MOVL 0(SP), DX 743 get_tls(CX) 744 MOVL g(CX), BP 745 MOVL g_m(BP), BP 746 747 // Set m->sched.sp = SP, so that if a panic happens 748 // during the function we are about to execute, it will 749 // have a valid SP to run on the g0 stack. 750 // The next few lines (after the havem label) 751 // will save this SP onto the stack and then write 752 // the same SP back to m->sched.sp. That seems redundant, 753 // but if an unrecovered panic happens, unwindm will 754 // restore the g->sched.sp from the stack location 755 // and then systemstack will try to use it. If we don't set it here, 756 // that restored SP will be uninitialized (typically 0) and 757 // will not be usable. 758 MOVL m_g0(BP), SI 759 MOVL SP, (g_sched+gobuf_sp)(SI) 760 761 havem: 762 // Now there's a valid m, and we're running on its m->g0. 763 // Save current m->g0->sched.sp on stack and then set it to SP. 764 // Save current sp in m->g0->sched.sp in preparation for 765 // switch back to m->curg stack. 766 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 767 MOVL m_g0(BP), SI 768 MOVL (g_sched+gobuf_sp)(SI), AX 769 MOVL AX, 0(SP) 770 MOVL SP, (g_sched+gobuf_sp)(SI) 771 772 // Switch to m->curg stack and call runtime.cgocallbackg. 773 // Because we are taking over the execution of m->curg 774 // but *not* resuming what had been running, we need to 775 // save that information (m->curg->sched) so we can restore it. 776 // We can restore m->curg->sched.sp easily, because calling 777 // runtime.cgocallbackg leaves SP unchanged upon return. 778 // To save m->curg->sched.pc, we push it onto the stack. 779 // This has the added benefit that it looks to the traceback 780 // routine like cgocallbackg is going to return to that 781 // PC (because the frame we allocate below has the same 782 // size as cgocallback_gofunc's frame declared above) 783 // so that the traceback will seamlessly trace back into 784 // the earlier calls. 785 // 786 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 787 // 4(SP) and 8(SP) are unused. 788 MOVL m_curg(BP), SI 789 MOVL SI, g(CX) 790 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 791 MOVL (g_sched+gobuf_pc)(SI), BP 792 MOVL BP, -4(DI) 793 LEAL -(4+12)(DI), SP 794 MOVL DX, 0(SP) 795 CALL runtime·cgocallbackg(SB) 796 MOVL 0(SP), DX 797 798 // Restore g->sched (== m->curg->sched) from saved values. 799 get_tls(CX) 800 MOVL g(CX), SI 801 MOVL 12(SP), BP 802 MOVL BP, (g_sched+gobuf_pc)(SI) 803 LEAL (12+4)(SP), DI 804 MOVL DI, (g_sched+gobuf_sp)(SI) 805 806 // Switch back to m->g0's stack and restore m->g0->sched.sp. 807 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 808 // so we do not have to restore it.) 809 MOVL g(CX), BP 810 MOVL g_m(BP), BP 811 MOVL m_g0(BP), SI 812 MOVL SI, g(CX) 813 MOVL (g_sched+gobuf_sp)(SI), SP 814 MOVL 0(SP), AX 815 MOVL AX, (g_sched+gobuf_sp)(SI) 816 817 // If the m on entry was nil, we called needm above to borrow an m 818 // for the duration of the call. Since the call is over, return it with dropm. 819 CMPL DX, $0 820 JNE 3(PC) 821 MOVL $runtime·dropm(SB), AX 822 CALL AX 823 824 // Done! 825 RET 826 827 // void setg(G*); set g. for use by needm. 828 TEXT runtime·setg(SB), NOSPLIT, $0-4 829 MOVL gg+0(FP), BX 830 #ifdef GOOS_windows 831 CMPL BX, $0 832 JNE settls 833 MOVL $0, 0x14(FS) 834 RET 835 settls: 836 MOVL g_m(BX), AX 837 LEAL m_tls(AX), AX 838 MOVL AX, 0x14(FS) 839 #endif 840 get_tls(CX) 841 MOVL BX, g(CX) 842 RET 843 844 // void setg_gcc(G*); set g. for use by gcc 845 TEXT setg_gcc<>(SB), NOSPLIT, $0 846 get_tls(AX) 847 MOVL gg+0(FP), DX 848 MOVL DX, g(AX) 849 RET 850 851 // check that SP is in range [g->stack.lo, g->stack.hi) 852 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 853 get_tls(CX) 854 MOVL g(CX), AX 855 CMPL (g_stack+stack_hi)(AX), SP 856 JHI 2(PC) 857 INT $3 858 CMPL SP, (g_stack+stack_lo)(AX) 859 JHI 2(PC) 860 INT $3 861 RET 862 863 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8 864 MOVL argp+0(FP),AX // addr of first arg 865 MOVL -4(AX),AX // get calling pc 866 MOVL AX, ret+4(FP) 867 RET 868 869 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8 870 MOVL argp+0(FP),AX // addr of first arg 871 MOVL pc+4(FP), BX 872 MOVL BX, -4(AX) // set calling pc 873 RET 874 875 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 876 MOVL argp+0(FP), AX 877 MOVL AX, ret+4(FP) 878 RET 879 880 // func cputicks() int64 881 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 882 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 883 JEQ done 884 CMPB runtime·lfenceBeforeRdtsc(SB), $1 885 JNE mfence 886 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 887 JMP done 888 mfence: 889 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 890 done: 891 RDTSC 892 MOVL AX, ret_lo+0(FP) 893 MOVL DX, ret_hi+4(FP) 894 RET 895 896 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 897 // set up ldt 7 to point at tls0 898 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 899 // the entry number is just a hint. setldt will set up GS with what it used. 900 MOVL $7, 0(SP) 901 LEAL runtime·tls0(SB), AX 902 MOVL AX, 4(SP) 903 MOVL $32, 8(SP) // sizeof(tls array) 904 CALL runtime·setldt(SB) 905 RET 906 907 TEXT runtime·emptyfunc(SB),0,$0-0 908 RET 909 910 TEXT runtime·abort(SB),NOSPLIT,$0-0 911 INT $0x3 912 913 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 914 // redirects to memhash(p, h, size) using the size 915 // stored in the closure. 916 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 917 GO_ARGS 918 NO_LOCAL_POINTERS 919 MOVL p+0(FP), AX 920 MOVL h+4(FP), BX 921 MOVL 4(DX), CX 922 MOVL AX, 0(SP) 923 MOVL BX, 4(SP) 924 MOVL CX, 8(SP) 925 CALL runtime·memhash(SB) 926 MOVL 12(SP), AX 927 MOVL AX, ret+8(FP) 928 RET 929 930 // hash function using AES hardware instructions 931 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 932 MOVL p+0(FP), AX // ptr to data 933 MOVL s+8(FP), CX // size 934 LEAL ret+12(FP), DX 935 JMP runtime·aeshashbody(SB) 936 937 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 938 MOVL p+0(FP), AX // ptr to string object 939 MOVL 4(AX), CX // length of string 940 MOVL (AX), AX // string data 941 LEAL ret+8(FP), DX 942 JMP runtime·aeshashbody(SB) 943 944 // AX: data 945 // CX: length 946 // DX: address to put return value 947 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 948 MOVL h+4(FP), X6 // seed to low 64 bits of xmm6 949 PINSRD $2, CX, X6 // size to high 64 bits of xmm6 950 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 951 MOVO runtime·aeskeysched(SB), X7 952 CMPL CX, $16 953 JB aes0to15 954 JE aes16 955 CMPL CX, $32 956 JBE aes17to32 957 CMPL CX, $64 958 JBE aes33to64 959 JMP aes65plus 960 961 aes0to15: 962 TESTL CX, CX 963 JE aes0 964 965 ADDL $16, AX 966 TESTW $0xff0, AX 967 JE endofpage 968 969 // 16 bytes loaded at this address won't cross 970 // a page boundary, so we can load it directly. 971 MOVOU -16(AX), X0 972 ADDL CX, CX 973 PAND masks<>(SB)(CX*8), X0 974 975 // scramble 3 times 976 AESENC X6, X0 977 AESENC X7, X0 978 AESENC X7, X0 979 MOVL X0, (DX) 980 RET 981 982 endofpage: 983 // address ends in 1111xxxx. Might be up against 984 // a page boundary, so load ending at last byte. 985 // Then shift bytes down using pshufb. 986 MOVOU -32(AX)(CX*1), X0 987 ADDL CX, CX 988 PSHUFB shifts<>(SB)(CX*8), X0 989 AESENC X6, X0 990 AESENC X7, X0 991 AESENC X7, X0 992 MOVL X0, (DX) 993 RET 994 995 aes0: 996 // return input seed 997 MOVL h+4(FP), AX 998 MOVL AX, (DX) 999 RET 1000 1001 aes16: 1002 MOVOU (AX), X0 1003 AESENC X6, X0 1004 AESENC X7, X0 1005 AESENC X7, X0 1006 MOVL X0, (DX) 1007 RET 1008 1009 1010 aes17to32: 1011 // load data to be hashed 1012 MOVOU (AX), X0 1013 MOVOU -16(AX)(CX*1), X1 1014 1015 // scramble 3 times 1016 AESENC X6, X0 1017 AESENC runtime·aeskeysched+16(SB), X1 1018 AESENC X7, X0 1019 AESENC X7, X1 1020 AESENC X7, X0 1021 AESENC X7, X1 1022 1023 // combine results 1024 PXOR X1, X0 1025 MOVL X0, (DX) 1026 RET 1027 1028 aes33to64: 1029 MOVOU (AX), X0 1030 MOVOU 16(AX), X1 1031 MOVOU -32(AX)(CX*1), X2 1032 MOVOU -16(AX)(CX*1), X3 1033 1034 AESENC X6, X0 1035 AESENC runtime·aeskeysched+16(SB), X1 1036 AESENC runtime·aeskeysched+32(SB), X2 1037 AESENC runtime·aeskeysched+48(SB), X3 1038 AESENC X7, X0 1039 AESENC X7, X1 1040 AESENC X7, X2 1041 AESENC X7, X3 1042 AESENC X7, X0 1043 AESENC X7, X1 1044 AESENC X7, X2 1045 AESENC X7, X3 1046 1047 PXOR X2, X0 1048 PXOR X3, X1 1049 PXOR X1, X0 1050 MOVL X0, (DX) 1051 RET 1052 1053 aes65plus: 1054 // start with last (possibly overlapping) block 1055 MOVOU -64(AX)(CX*1), X0 1056 MOVOU -48(AX)(CX*1), X1 1057 MOVOU -32(AX)(CX*1), X2 1058 MOVOU -16(AX)(CX*1), X3 1059 1060 // scramble state once 1061 AESENC X6, X0 1062 AESENC runtime·aeskeysched+16(SB), X1 1063 AESENC runtime·aeskeysched+32(SB), X2 1064 AESENC runtime·aeskeysched+48(SB), X3 1065 1066 // compute number of remaining 64-byte blocks 1067 DECL CX 1068 SHRL $6, CX 1069 1070 aesloop: 1071 // scramble state, xor in a block 1072 MOVOU (AX), X4 1073 MOVOU 16(AX), X5 1074 AESENC X4, X0 1075 AESENC X5, X1 1076 MOVOU 32(AX), X4 1077 MOVOU 48(AX), X5 1078 AESENC X4, X2 1079 AESENC X5, X3 1080 1081 // scramble state 1082 AESENC X7, X0 1083 AESENC X7, X1 1084 AESENC X7, X2 1085 AESENC X7, X3 1086 1087 ADDL $64, AX 1088 DECL CX 1089 JNE aesloop 1090 1091 // 2 more scrambles to finish 1092 AESENC X7, X0 1093 AESENC X7, X1 1094 AESENC X7, X2 1095 AESENC X7, X3 1096 AESENC X7, X0 1097 AESENC X7, X1 1098 AESENC X7, X2 1099 AESENC X7, X3 1100 1101 PXOR X2, X0 1102 PXOR X3, X1 1103 PXOR X1, X0 1104 MOVL X0, (DX) 1105 RET 1106 1107 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1108 MOVL p+0(FP), AX // ptr to data 1109 MOVL h+4(FP), X0 // seed 1110 PINSRD $1, (AX), X0 // data 1111 AESENC runtime·aeskeysched+0(SB), X0 1112 AESENC runtime·aeskeysched+16(SB), X0 1113 AESENC runtime·aeskeysched+32(SB), X0 1114 MOVL X0, ret+8(FP) 1115 RET 1116 1117 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1118 MOVL p+0(FP), AX // ptr to data 1119 MOVQ (AX), X0 // data 1120 PINSRD $2, h+4(FP), X0 // seed 1121 AESENC runtime·aeskeysched+0(SB), X0 1122 AESENC runtime·aeskeysched+16(SB), X0 1123 AESENC runtime·aeskeysched+32(SB), X0 1124 MOVL X0, ret+8(FP) 1125 RET 1126 1127 // simple mask to get rid of data in the high part of the register. 1128 DATA masks<>+0x00(SB)/4, $0x00000000 1129 DATA masks<>+0x04(SB)/4, $0x00000000 1130 DATA masks<>+0x08(SB)/4, $0x00000000 1131 DATA masks<>+0x0c(SB)/4, $0x00000000 1132 1133 DATA masks<>+0x10(SB)/4, $0x000000ff 1134 DATA masks<>+0x14(SB)/4, $0x00000000 1135 DATA masks<>+0x18(SB)/4, $0x00000000 1136 DATA masks<>+0x1c(SB)/4, $0x00000000 1137 1138 DATA masks<>+0x20(SB)/4, $0x0000ffff 1139 DATA masks<>+0x24(SB)/4, $0x00000000 1140 DATA masks<>+0x28(SB)/4, $0x00000000 1141 DATA masks<>+0x2c(SB)/4, $0x00000000 1142 1143 DATA masks<>+0x30(SB)/4, $0x00ffffff 1144 DATA masks<>+0x34(SB)/4, $0x00000000 1145 DATA masks<>+0x38(SB)/4, $0x00000000 1146 DATA masks<>+0x3c(SB)/4, $0x00000000 1147 1148 DATA masks<>+0x40(SB)/4, $0xffffffff 1149 DATA masks<>+0x44(SB)/4, $0x00000000 1150 DATA masks<>+0x48(SB)/4, $0x00000000 1151 DATA masks<>+0x4c(SB)/4, $0x00000000 1152 1153 DATA masks<>+0x50(SB)/4, $0xffffffff 1154 DATA masks<>+0x54(SB)/4, $0x000000ff 1155 DATA masks<>+0x58(SB)/4, $0x00000000 1156 DATA masks<>+0x5c(SB)/4, $0x00000000 1157 1158 DATA masks<>+0x60(SB)/4, $0xffffffff 1159 DATA masks<>+0x64(SB)/4, $0x0000ffff 1160 DATA masks<>+0x68(SB)/4, $0x00000000 1161 DATA masks<>+0x6c(SB)/4, $0x00000000 1162 1163 DATA masks<>+0x70(SB)/4, $0xffffffff 1164 DATA masks<>+0x74(SB)/4, $0x00ffffff 1165 DATA masks<>+0x78(SB)/4, $0x00000000 1166 DATA masks<>+0x7c(SB)/4, $0x00000000 1167 1168 DATA masks<>+0x80(SB)/4, $0xffffffff 1169 DATA masks<>+0x84(SB)/4, $0xffffffff 1170 DATA masks<>+0x88(SB)/4, $0x00000000 1171 DATA masks<>+0x8c(SB)/4, $0x00000000 1172 1173 DATA masks<>+0x90(SB)/4, $0xffffffff 1174 DATA masks<>+0x94(SB)/4, $0xffffffff 1175 DATA masks<>+0x98(SB)/4, $0x000000ff 1176 DATA masks<>+0x9c(SB)/4, $0x00000000 1177 1178 DATA masks<>+0xa0(SB)/4, $0xffffffff 1179 DATA masks<>+0xa4(SB)/4, $0xffffffff 1180 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1181 DATA masks<>+0xac(SB)/4, $0x00000000 1182 1183 DATA masks<>+0xb0(SB)/4, $0xffffffff 1184 DATA masks<>+0xb4(SB)/4, $0xffffffff 1185 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1186 DATA masks<>+0xbc(SB)/4, $0x00000000 1187 1188 DATA masks<>+0xc0(SB)/4, $0xffffffff 1189 DATA masks<>+0xc4(SB)/4, $0xffffffff 1190 DATA masks<>+0xc8(SB)/4, $0xffffffff 1191 DATA masks<>+0xcc(SB)/4, $0x00000000 1192 1193 DATA masks<>+0xd0(SB)/4, $0xffffffff 1194 DATA masks<>+0xd4(SB)/4, $0xffffffff 1195 DATA masks<>+0xd8(SB)/4, $0xffffffff 1196 DATA masks<>+0xdc(SB)/4, $0x000000ff 1197 1198 DATA masks<>+0xe0(SB)/4, $0xffffffff 1199 DATA masks<>+0xe4(SB)/4, $0xffffffff 1200 DATA masks<>+0xe8(SB)/4, $0xffffffff 1201 DATA masks<>+0xec(SB)/4, $0x0000ffff 1202 1203 DATA masks<>+0xf0(SB)/4, $0xffffffff 1204 DATA masks<>+0xf4(SB)/4, $0xffffffff 1205 DATA masks<>+0xf8(SB)/4, $0xffffffff 1206 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1207 1208 GLOBL masks<>(SB),RODATA,$256 1209 1210 // these are arguments to pshufb. They move data down from 1211 // the high bytes of the register to the low bytes of the register. 1212 // index is how many bytes to move. 1213 DATA shifts<>+0x00(SB)/4, $0x00000000 1214 DATA shifts<>+0x04(SB)/4, $0x00000000 1215 DATA shifts<>+0x08(SB)/4, $0x00000000 1216 DATA shifts<>+0x0c(SB)/4, $0x00000000 1217 1218 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1219 DATA shifts<>+0x14(SB)/4, $0xffffffff 1220 DATA shifts<>+0x18(SB)/4, $0xffffffff 1221 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1222 1223 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1224 DATA shifts<>+0x24(SB)/4, $0xffffffff 1225 DATA shifts<>+0x28(SB)/4, $0xffffffff 1226 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1227 1228 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1229 DATA shifts<>+0x34(SB)/4, $0xffffffff 1230 DATA shifts<>+0x38(SB)/4, $0xffffffff 1231 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1232 1233 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1234 DATA shifts<>+0x44(SB)/4, $0xffffffff 1235 DATA shifts<>+0x48(SB)/4, $0xffffffff 1236 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1237 1238 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1239 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1240 DATA shifts<>+0x58(SB)/4, $0xffffffff 1241 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1242 1243 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1244 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1245 DATA shifts<>+0x68(SB)/4, $0xffffffff 1246 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1247 1248 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1249 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1250 DATA shifts<>+0x78(SB)/4, $0xffffffff 1251 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1252 1253 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1254 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1255 DATA shifts<>+0x88(SB)/4, $0xffffffff 1256 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1257 1258 DATA shifts<>+0x90(SB)/4, $0x0a090807 1259 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1260 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1261 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1262 1263 DATA shifts<>+0xa0(SB)/4, $0x09080706 1264 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1265 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1266 DATA shifts<>+0xac(SB)/4, $0xffffffff 1267 1268 DATA shifts<>+0xb0(SB)/4, $0x08070605 1269 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1270 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1271 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1272 1273 DATA shifts<>+0xc0(SB)/4, $0x07060504 1274 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1275 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1276 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1277 1278 DATA shifts<>+0xd0(SB)/4, $0x06050403 1279 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1280 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1281 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1282 1283 DATA shifts<>+0xe0(SB)/4, $0x05040302 1284 DATA shifts<>+0xe4(SB)/4, $0x09080706 1285 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1286 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1287 1288 DATA shifts<>+0xf0(SB)/4, $0x04030201 1289 DATA shifts<>+0xf4(SB)/4, $0x08070605 1290 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1291 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1292 1293 GLOBL shifts<>(SB),RODATA,$256 1294 1295 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1296 MOVL a+0(FP), SI 1297 MOVL b+4(FP), DI 1298 MOVL size+8(FP), BX 1299 CALL runtime·memeqbody(SB) 1300 MOVB AX, ret+12(FP) 1301 RET 1302 1303 // memequal_varlen(a, b unsafe.Pointer) bool 1304 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1305 MOVL a+0(FP), SI 1306 MOVL b+4(FP), DI 1307 CMPL SI, DI 1308 JEQ eq 1309 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1310 CALL runtime·memeqbody(SB) 1311 MOVB AX, ret+8(FP) 1312 RET 1313 eq: 1314 MOVB $1, ret+8(FP) 1315 RET 1316 1317 // eqstring tests whether two strings are equal. 1318 // The compiler guarantees that strings passed 1319 // to eqstring have equal length. 1320 // See runtime_test.go:eqstring_generic for 1321 // equivalent Go code. 1322 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1323 MOVL s1str+0(FP), SI 1324 MOVL s2str+8(FP), DI 1325 CMPL SI, DI 1326 JEQ same 1327 MOVL s1len+4(FP), BX 1328 CALL runtime·memeqbody(SB) 1329 MOVB AX, v+16(FP) 1330 RET 1331 same: 1332 MOVB $1, v+16(FP) 1333 RET 1334 1335 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1336 MOVL a_len+4(FP), BX 1337 MOVL b_len+16(FP), CX 1338 XORL AX, AX 1339 CMPL BX, CX 1340 JNE eqret 1341 MOVL a+0(FP), SI 1342 MOVL b+12(FP), DI 1343 CALL runtime·memeqbody(SB) 1344 eqret: 1345 MOVB AX, ret+24(FP) 1346 RET 1347 1348 // a in SI 1349 // b in DI 1350 // count in BX 1351 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1352 XORL AX, AX 1353 1354 CMPL BX, $4 1355 JB small 1356 1357 // 64 bytes at a time using xmm registers 1358 hugeloop: 1359 CMPL BX, $64 1360 JB bigloop 1361 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1362 JE bigloop 1363 MOVOU (SI), X0 1364 MOVOU (DI), X1 1365 MOVOU 16(SI), X2 1366 MOVOU 16(DI), X3 1367 MOVOU 32(SI), X4 1368 MOVOU 32(DI), X5 1369 MOVOU 48(SI), X6 1370 MOVOU 48(DI), X7 1371 PCMPEQB X1, X0 1372 PCMPEQB X3, X2 1373 PCMPEQB X5, X4 1374 PCMPEQB X7, X6 1375 PAND X2, X0 1376 PAND X6, X4 1377 PAND X4, X0 1378 PMOVMSKB X0, DX 1379 ADDL $64, SI 1380 ADDL $64, DI 1381 SUBL $64, BX 1382 CMPL DX, $0xffff 1383 JEQ hugeloop 1384 RET 1385 1386 // 4 bytes at a time using 32-bit register 1387 bigloop: 1388 CMPL BX, $4 1389 JBE leftover 1390 MOVL (SI), CX 1391 MOVL (DI), DX 1392 ADDL $4, SI 1393 ADDL $4, DI 1394 SUBL $4, BX 1395 CMPL CX, DX 1396 JEQ bigloop 1397 RET 1398 1399 // remaining 0-4 bytes 1400 leftover: 1401 MOVL -4(SI)(BX*1), CX 1402 MOVL -4(DI)(BX*1), DX 1403 CMPL CX, DX 1404 SETEQ AX 1405 RET 1406 1407 small: 1408 CMPL BX, $0 1409 JEQ equal 1410 1411 LEAL 0(BX*8), CX 1412 NEGL CX 1413 1414 MOVL SI, DX 1415 CMPB DX, $0xfc 1416 JA si_high 1417 1418 // load at SI won't cross a page boundary. 1419 MOVL (SI), SI 1420 JMP si_finish 1421 si_high: 1422 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1423 MOVL -4(SI)(BX*1), SI 1424 SHRL CX, SI 1425 si_finish: 1426 1427 // same for DI. 1428 MOVL DI, DX 1429 CMPB DX, $0xfc 1430 JA di_high 1431 MOVL (DI), DI 1432 JMP di_finish 1433 di_high: 1434 MOVL -4(DI)(BX*1), DI 1435 SHRL CX, DI 1436 di_finish: 1437 1438 SUBL SI, DI 1439 SHLL CX, DI 1440 equal: 1441 SETEQ AX 1442 RET 1443 1444 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1445 MOVL s1_base+0(FP), SI 1446 MOVL s1_len+4(FP), BX 1447 MOVL s2_base+8(FP), DI 1448 MOVL s2_len+12(FP), DX 1449 CALL runtime·cmpbody(SB) 1450 MOVL AX, ret+16(FP) 1451 RET 1452 1453 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1454 MOVL s1+0(FP), SI 1455 MOVL s1+4(FP), BX 1456 MOVL s2+12(FP), DI 1457 MOVL s2+16(FP), DX 1458 CALL runtime·cmpbody(SB) 1459 MOVL AX, ret+24(FP) 1460 RET 1461 1462 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1463 MOVL s+0(FP), SI 1464 MOVL s_len+4(FP), CX 1465 MOVB c+12(FP), AL 1466 MOVL SI, DI 1467 CLD; REPN; SCASB 1468 JZ 3(PC) 1469 MOVL $-1, ret+16(FP) 1470 RET 1471 SUBL SI, DI 1472 SUBL $1, DI 1473 MOVL DI, ret+16(FP) 1474 RET 1475 1476 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1477 MOVL s+0(FP), SI 1478 MOVL s_len+4(FP), CX 1479 MOVB c+8(FP), AL 1480 MOVL SI, DI 1481 CLD; REPN; SCASB 1482 JZ 3(PC) 1483 MOVL $-1, ret+12(FP) 1484 RET 1485 SUBL SI, DI 1486 SUBL $1, DI 1487 MOVL DI, ret+12(FP) 1488 RET 1489 1490 // input: 1491 // SI = a 1492 // DI = b 1493 // BX = alen 1494 // DX = blen 1495 // output: 1496 // AX = 1/0/-1 1497 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1498 CMPL SI, DI 1499 JEQ allsame 1500 CMPL BX, DX 1501 MOVL DX, BP 1502 CMOVLLT BX, BP // BP = min(alen, blen) 1503 CMPL BP, $4 1504 JB small 1505 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1506 JE mediumloop 1507 largeloop: 1508 CMPL BP, $16 1509 JB mediumloop 1510 MOVOU (SI), X0 1511 MOVOU (DI), X1 1512 PCMPEQB X0, X1 1513 PMOVMSKB X1, AX 1514 XORL $0xffff, AX // convert EQ to NE 1515 JNE diff16 // branch if at least one byte is not equal 1516 ADDL $16, SI 1517 ADDL $16, DI 1518 SUBL $16, BP 1519 JMP largeloop 1520 1521 diff16: 1522 BSFL AX, BX // index of first byte that differs 1523 XORL AX, AX 1524 MOVB (SI)(BX*1), CX 1525 CMPB CX, (DI)(BX*1) 1526 SETHI AX 1527 LEAL -1(AX*2), AX // convert 1/0 to +1/-1 1528 RET 1529 1530 mediumloop: 1531 CMPL BP, $4 1532 JBE _0through4 1533 MOVL (SI), AX 1534 MOVL (DI), CX 1535 CMPL AX, CX 1536 JNE diff4 1537 ADDL $4, SI 1538 ADDL $4, DI 1539 SUBL $4, BP 1540 JMP mediumloop 1541 1542 _0through4: 1543 MOVL -4(SI)(BP*1), AX 1544 MOVL -4(DI)(BP*1), CX 1545 CMPL AX, CX 1546 JEQ allsame 1547 1548 diff4: 1549 BSWAPL AX // reverse order of bytes 1550 BSWAPL CX 1551 XORL AX, CX // find bit differences 1552 BSRL CX, CX // index of highest bit difference 1553 SHRL CX, AX // move a's bit to bottom 1554 ANDL $1, AX // mask bit 1555 LEAL -1(AX*2), AX // 1/0 => +1/-1 1556 RET 1557 1558 // 0-3 bytes in common 1559 small: 1560 LEAL (BP*8), CX 1561 NEGL CX 1562 JEQ allsame 1563 1564 // load si 1565 CMPB SI, $0xfc 1566 JA si_high 1567 MOVL (SI), SI 1568 JMP si_finish 1569 si_high: 1570 MOVL -4(SI)(BP*1), SI 1571 SHRL CX, SI 1572 si_finish: 1573 SHLL CX, SI 1574 1575 // same for di 1576 CMPB DI, $0xfc 1577 JA di_high 1578 MOVL (DI), DI 1579 JMP di_finish 1580 di_high: 1581 MOVL -4(DI)(BP*1), DI 1582 SHRL CX, DI 1583 di_finish: 1584 SHLL CX, DI 1585 1586 BSWAPL SI // reverse order of bytes 1587 BSWAPL DI 1588 XORL SI, DI // find bit differences 1589 JEQ allsame 1590 BSRL DI, CX // index of highest bit difference 1591 SHRL CX, SI // move a's bit to bottom 1592 ANDL $1, SI // mask bit 1593 LEAL -1(SI*2), AX // 1/0 => +1/-1 1594 RET 1595 1596 // all the bytes in common are the same, so we just need 1597 // to compare the lengths. 1598 allsame: 1599 XORL AX, AX 1600 XORL CX, CX 1601 CMPL BX, DX 1602 SETGT AX // 1 if alen > blen 1603 SETEQ CX // 1 if alen == blen 1604 LEAL -1(CX)(AX*2), AX // 1,0,-1 result 1605 RET 1606 1607 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1608 get_tls(CX) 1609 MOVL g(CX), AX 1610 MOVL g_m(AX), AX 1611 MOVL m_fastrand(AX), DX 1612 ADDL DX, DX 1613 MOVL DX, BX 1614 XORL $0x88888eef, DX 1615 CMOVLMI BX, DX 1616 MOVL DX, m_fastrand(AX) 1617 MOVL DX, ret+0(FP) 1618 RET 1619 1620 TEXT runtime·return0(SB), NOSPLIT, $0 1621 MOVL $0, AX 1622 RET 1623 1624 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1625 // Must obey the gcc calling convention. 1626 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1627 get_tls(CX) 1628 MOVL g(CX), AX 1629 MOVL g_m(AX), AX 1630 MOVL m_curg(AX), AX 1631 MOVL (g_stack+stack_hi)(AX), AX 1632 RET 1633 1634 // The top-most function running on a goroutine 1635 // returns to goexit+PCQuantum. 1636 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1637 BYTE $0x90 // NOP 1638 CALL runtime·goexit1(SB) // does not return 1639 // traceback from goexit1 must hit code range of goexit 1640 BYTE $0x90 // NOP 1641 1642 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1643 MOVL addr+0(FP), AX 1644 PREFETCHT0 (AX) 1645 RET 1646 1647 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1648 MOVL addr+0(FP), AX 1649 PREFETCHT1 (AX) 1650 RET 1651 1652 1653 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1654 MOVL addr+0(FP), AX 1655 PREFETCHT2 (AX) 1656 RET 1657 1658 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1659 MOVL addr+0(FP), AX 1660 PREFETCHNTA (AX) 1661 RET