github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "zasm_GOOS_GOARCH.h" 6 7 TEXT _rt0_386(SB),7,$0 8 // copy arguments forward on an even stack 9 MOVL argc+0(FP), AX 10 MOVL argv+4(FP), BX 11 SUBL $128, SP // plenty of scratch 12 ANDL $~15, SP 13 MOVL AX, 120(SP) // save argc, argv away 14 MOVL BX, 124(SP) 15 16 // set default stack bounds. 17 // _cgo_init may update stackguard. 18 MOVL $runtime·g0(SB), BP 19 LEAL (-64*1024+104)(SP), BX 20 MOVL BX, g_stackguard(BP) 21 MOVL SP, g_stackbase(BP) 22 23 // find out information about the processor we're on 24 MOVL $0, AX 25 CPUID 26 CMPL AX, $0 27 JE nocpuinfo 28 MOVL $1, AX 29 CPUID 30 MOVL CX, runtime·cpuid_ecx(SB) 31 MOVL DX, runtime·cpuid_edx(SB) 32 nocpuinfo: 33 34 // if there is an _cgo_init, call it to let it 35 // initialize and to set up GS. if not, 36 // we set up GS ourselves. 37 MOVL _cgo_init(SB), AX 38 TESTL AX, AX 39 JZ needtls 40 MOVL $setmg_gcc<>(SB), BX 41 MOVL BX, 4(SP) 42 MOVL BP, 0(SP) 43 CALL AX 44 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 45 CMPL runtime·iswindows(SB), $0 46 JEQ ok 47 needtls: 48 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 49 CMPL runtime·isplan9(SB), $1 50 JEQ ok 51 52 // set up %gs 53 CALL runtime·ldt0setup(SB) 54 55 // store through it, to make sure it works 56 get_tls(BX) 57 MOVL $0x123, g(BX) 58 MOVL runtime·tls0(SB), AX 59 CMPL AX, $0x123 60 JEQ ok 61 MOVL AX, 0 // abort 62 ok: 63 // set up m and g "registers" 64 get_tls(BX) 65 LEAL runtime·g0(SB), CX 66 MOVL CX, g(BX) 67 LEAL runtime·m0(SB), AX 68 MOVL AX, m(BX) 69 70 // save m->g0 = g0 71 MOVL CX, m_g0(AX) 72 73 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 74 75 // convention is D is always cleared 76 CLD 77 78 CALL runtime·check(SB) 79 80 // saved argc, argv 81 MOVL 120(SP), AX 82 MOVL AX, 0(SP) 83 MOVL 124(SP), AX 84 MOVL AX, 4(SP) 85 CALL runtime·args(SB) 86 CALL runtime·osinit(SB) 87 CALL runtime·hashinit(SB) 88 CALL runtime·schedinit(SB) 89 90 // create a new goroutine to start program 91 PUSHL $runtime·main·f(SB) // entry 92 PUSHL $0 // arg size 93 CALL runtime·newproc(SB) 94 POPL AX 95 POPL AX 96 97 // start this M 98 CALL runtime·mstart(SB) 99 100 INT $3 101 RET 102 103 DATA runtime·main·f+0(SB)/4,$runtime·main(SB) 104 GLOBL runtime·main·f(SB),8,$4 105 106 TEXT runtime·breakpoint(SB),7,$0 107 INT $3 108 RET 109 110 TEXT runtime·asminit(SB),7,$0 111 // Linux and MinGW start the FPU in extended double precision. 112 // Other operating systems use double precision. 113 // Change to double precision to match them, 114 // and to match other hardware that only has double. 115 PUSHL $0x27F 116 FLDCW 0(SP) 117 POPL AX 118 RET 119 120 /* 121 * go-routine 122 */ 123 124 // void gosave(Gobuf*) 125 // save state in Gobuf; setjmp 126 TEXT runtime·gosave(SB), 7, $0 127 MOVL 4(SP), AX // gobuf 128 LEAL 4(SP), BX // caller's SP 129 MOVL BX, gobuf_sp(AX) 130 MOVL 0(SP), BX // caller's PC 131 MOVL BX, gobuf_pc(AX) 132 get_tls(CX) 133 MOVL g(CX), BX 134 MOVL BX, gobuf_g(AX) 135 RET 136 137 // void gogo(Gobuf*, uintptr) 138 // restore state from Gobuf; longjmp 139 TEXT runtime·gogo(SB), 7, $0 140 MOVL 8(SP), AX // return 2nd arg 141 MOVL 4(SP), BX // gobuf 142 MOVL gobuf_g(BX), DX 143 MOVL 0(DX), CX // make sure g != nil 144 get_tls(CX) 145 MOVL DX, g(CX) 146 MOVL gobuf_sp(BX), SP // restore SP 147 MOVL gobuf_pc(BX), BX 148 JMP BX 149 150 // void gogocall(Gobuf*, void (*fn)(void), uintptr r0) 151 // restore state from Gobuf but then call fn. 152 // (call fn, returning to state in Gobuf) 153 TEXT runtime·gogocall(SB), 7, $0 154 MOVL 12(SP), DX // context 155 MOVL 8(SP), AX // fn 156 MOVL 4(SP), BX // gobuf 157 MOVL gobuf_g(BX), DI 158 get_tls(CX) 159 MOVL DI, g(CX) 160 MOVL 0(DI), CX // make sure g != nil 161 MOVL gobuf_sp(BX), SP // restore SP 162 MOVL gobuf_pc(BX), BX 163 PUSHL BX 164 JMP AX 165 POPL BX // not reached 166 167 // void gogocallfn(Gobuf*, FuncVal*) 168 // restore state from Gobuf but then call fn. 169 // (call fn, returning to state in Gobuf) 170 TEXT runtime·gogocallfn(SB), 7, $0 171 MOVL 8(SP), DX // fn 172 MOVL 4(SP), BX // gobuf 173 MOVL gobuf_g(BX), DI 174 get_tls(CX) 175 MOVL DI, g(CX) 176 MOVL 0(DI), CX // make sure g != nil 177 MOVL gobuf_sp(BX), SP // restore SP 178 MOVL gobuf_pc(BX), BX 179 PUSHL BX 180 MOVL 0(DX), BX 181 JMP BX 182 POPL BX // not reached 183 184 // void mcall(void (*fn)(G*)) 185 // Switch to m->g0's stack, call fn(g). 186 // Fn must never return. It should gogo(&g->sched) 187 // to keep running g. 188 TEXT runtime·mcall(SB), 7, $0 189 MOVL fn+0(FP), DI 190 191 get_tls(CX) 192 MOVL g(CX), AX // save state in g->gobuf 193 MOVL 0(SP), BX // caller's PC 194 MOVL BX, (g_sched+gobuf_pc)(AX) 195 LEAL 4(SP), BX // caller's SP 196 MOVL BX, (g_sched+gobuf_sp)(AX) 197 MOVL AX, (g_sched+gobuf_g)(AX) 198 199 // switch to m->g0 & its stack, call fn 200 MOVL m(CX), BX 201 MOVL m_g0(BX), SI 202 CMPL SI, AX // if g == m->g0 call badmcall 203 JNE 2(PC) 204 CALL runtime·badmcall(SB) 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->gobuf.sp 207 PUSHL AX 208 CALL DI 209 POPL AX 210 CALL runtime·badmcall2(SB) 211 RET 212 213 /* 214 * support for morestack 215 */ 216 217 // Called during function prolog when more stack is needed. 218 TEXT runtime·morestack(SB),7,$0 219 // Cannot grow scheduler stack (m->g0). 220 get_tls(CX) 221 MOVL m(CX), BX 222 MOVL m_g0(BX), SI 223 CMPL g(CX), SI 224 JNE 2(PC) 225 INT $3 226 227 MOVL DX, m_cret(BX) 228 229 // frame size in DI 230 // arg size in AX 231 // Save in m. 232 MOVL DI, m_moreframesize(BX) 233 MOVL AX, m_moreargsize(BX) 234 235 // Called from f. 236 // Set m->morebuf to f's caller. 237 MOVL 4(SP), DI // f's caller's PC 238 MOVL DI, (m_morebuf+gobuf_pc)(BX) 239 LEAL 8(SP), CX // f's caller's SP 240 MOVL CX, (m_morebuf+gobuf_sp)(BX) 241 MOVL CX, m_moreargp(BX) 242 get_tls(CX) 243 MOVL g(CX), SI 244 MOVL SI, (m_morebuf+gobuf_g)(BX) 245 246 // Set m->morepc to f's PC. 247 MOVL 0(SP), AX 248 MOVL AX, m_morepc(BX) 249 250 // Call newstack on m->g0's stack. 251 MOVL m_g0(BX), BP 252 MOVL BP, g(CX) 253 MOVL (g_sched+gobuf_sp)(BP), AX 254 MOVL -4(AX), BX // fault if CALL would, before smashing SP 255 MOVL AX, SP 256 CALL runtime·newstack(SB) 257 MOVL $0, 0x1003 // crash if newstack returns 258 RET 259 260 // Called from reflection library. Mimics morestack, 261 // reuses stack growth code to create a frame 262 // with the desired args running the desired function. 263 // 264 // func call(fn *byte, arg *byte, argsize uint32). 265 TEXT reflect·call(SB), 7, $0 266 get_tls(CX) 267 MOVL m(CX), BX 268 269 // Save our caller's state as the PC and SP to 270 // restore when returning from f. 271 MOVL 0(SP), AX // our caller's PC 272 MOVL AX, (m_morebuf+gobuf_pc)(BX) 273 LEAL 4(SP), AX // our caller's SP 274 MOVL AX, (m_morebuf+gobuf_sp)(BX) 275 MOVL g(CX), AX 276 MOVL AX, (m_morebuf+gobuf_g)(BX) 277 278 // Set up morestack arguments to call f on a new stack. 279 // We set f's frame size to 1, as a hint to newstack 280 // that this is a call from reflect·call. 281 // If it turns out that f needs a larger frame than 282 // the default stack, f's usual stack growth prolog will 283 // allocate a new segment (and recopy the arguments). 284 MOVL 4(SP), AX // fn 285 MOVL 8(SP), DX // arg frame 286 MOVL 12(SP), CX // arg size 287 288 MOVL AX, m_morepc(BX) // f's PC 289 MOVL DX, m_moreargp(BX) // f's argument pointer 290 MOVL CX, m_moreargsize(BX) // f's argument size 291 MOVL $1, m_moreframesize(BX) // f's frame size 292 293 // Call newstack on m->g0's stack. 294 MOVL m_g0(BX), BP 295 get_tls(CX) 296 MOVL BP, g(CX) 297 MOVL (g_sched+gobuf_sp)(BP), SP 298 CALL runtime·newstack(SB) 299 MOVL $0, 0x1103 // crash if newstack returns 300 RET 301 302 303 // Return point when leaving stack. 304 TEXT runtime·lessstack(SB), 7, $0 305 // Save return value in m->cret 306 get_tls(CX) 307 MOVL m(CX), BX 308 MOVL AX, m_cret(BX) 309 310 // Call oldstack on m->g0's stack. 311 MOVL m_g0(BX), BP 312 MOVL BP, g(CX) 313 MOVL (g_sched+gobuf_sp)(BP), SP 314 CALL runtime·oldstack(SB) 315 MOVL $0, 0x1004 // crash if oldstack returns 316 RET 317 318 319 // bool cas(int32 *val, int32 old, int32 new) 320 // Atomically: 321 // if(*val == old){ 322 // *val = new; 323 // return 1; 324 // }else 325 // return 0; 326 TEXT runtime·cas(SB), 7, $0 327 MOVL 4(SP), BX 328 MOVL 8(SP), AX 329 MOVL 12(SP), CX 330 LOCK 331 CMPXCHGL CX, 0(BX) 332 JZ 3(PC) 333 MOVL $0, AX 334 RET 335 MOVL $1, AX 336 RET 337 338 // bool runtime·cas64(uint64 *val, uint64 *old, uint64 new) 339 // Atomically: 340 // if(*val == *old){ 341 // *val = new; 342 // return 1; 343 // } else { 344 // *old = *val 345 // return 0; 346 // } 347 TEXT runtime·cas64(SB), 7, $0 348 MOVL 4(SP), BP 349 MOVL 8(SP), SI 350 MOVL 0(SI), AX 351 MOVL 4(SI), DX 352 MOVL 12(SP), BX 353 MOVL 16(SP), CX 354 LOCK 355 CMPXCHG8B 0(BP) 356 JNZ cas64_fail 357 MOVL $1, AX 358 RET 359 cas64_fail: 360 MOVL AX, 0(SI) 361 MOVL DX, 4(SI) 362 MOVL $0, AX 363 RET 364 365 // bool casp(void **p, void *old, void *new) 366 // Atomically: 367 // if(*p == old){ 368 // *p = new; 369 // return 1; 370 // }else 371 // return 0; 372 TEXT runtime·casp(SB), 7, $0 373 MOVL 4(SP), BX 374 MOVL 8(SP), AX 375 MOVL 12(SP), CX 376 LOCK 377 CMPXCHGL CX, 0(BX) 378 JZ 3(PC) 379 MOVL $0, AX 380 RET 381 MOVL $1, AX 382 RET 383 384 // uint32 xadd(uint32 volatile *val, int32 delta) 385 // Atomically: 386 // *val += delta; 387 // return *val; 388 TEXT runtime·xadd(SB), 7, $0 389 MOVL 4(SP), BX 390 MOVL 8(SP), AX 391 MOVL AX, CX 392 LOCK 393 XADDL AX, 0(BX) 394 ADDL CX, AX 395 RET 396 397 TEXT runtime·xchg(SB), 7, $0 398 MOVL 4(SP), BX 399 MOVL 8(SP), AX 400 XCHGL AX, 0(BX) 401 RET 402 403 TEXT runtime·procyield(SB),7,$0 404 MOVL 4(SP), AX 405 again: 406 PAUSE 407 SUBL $1, AX 408 JNZ again 409 RET 410 411 TEXT runtime·atomicstorep(SB), 7, $0 412 MOVL 4(SP), BX 413 MOVL 8(SP), AX 414 XCHGL AX, 0(BX) 415 RET 416 417 TEXT runtime·atomicstore(SB), 7, $0 418 MOVL 4(SP), BX 419 MOVL 8(SP), AX 420 XCHGL AX, 0(BX) 421 RET 422 423 // uint64 atomicload64(uint64 volatile* addr); 424 // so actually 425 // void atomicload64(uint64 *res, uint64 volatile *addr); 426 TEXT runtime·atomicload64(SB), 7, $0 427 MOVL 4(SP), BX 428 MOVL 8(SP), AX 429 // MOVQ (%EAX), %MM0 430 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 431 // MOVQ %MM0, 0(%EBX) 432 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 433 // EMMS 434 BYTE $0x0F; BYTE $0x77 435 RET 436 437 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); 438 TEXT runtime·atomicstore64(SB), 7, $0 439 MOVL 4(SP), AX 440 // MOVQ and EMMS were introduced on the Pentium MMX. 441 // MOVQ 0x8(%ESP), %MM0 442 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 443 // MOVQ %MM0, (%EAX) 444 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 445 // EMMS 446 BYTE $0x0F; BYTE $0x77 447 // This is essentially a no-op, but it provides required memory fencing. 448 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 449 MOVL $0, AX 450 LOCK 451 XADDL AX, (SP) 452 RET 453 454 // void jmpdefer(fn, sp); 455 // called from deferreturn. 456 // 1. pop the caller 457 // 2. sub 5 bytes from the callers return 458 // 3. jmp to the argument 459 TEXT runtime·jmpdefer(SB), 7, $0 460 MOVL 4(SP), DX // fn 461 MOVL 8(SP), BX // caller sp 462 LEAL -4(BX), SP // caller sp after CALL 463 SUBL $5, (SP) // return to CALL again 464 MOVL 0(DX), BX 465 JMP BX // but first run the deferred function 466 467 // Dummy function to use in saved gobuf.PC, 468 // to match SP pointing at a return address. 469 // The gobuf.PC is unused by the contortions here 470 // but setting it to return will make the traceback code work. 471 TEXT return<>(SB),7,$0 472 RET 473 474 // asmcgocall(void(*fn)(void*), void *arg) 475 // Call fn(arg) on the scheduler stack, 476 // aligned appropriately for the gcc ABI. 477 // See cgocall.c for more details. 478 TEXT runtime·asmcgocall(SB),7,$0 479 MOVL fn+0(FP), AX 480 MOVL arg+4(FP), BX 481 MOVL SP, DX 482 483 // Figure out if we need to switch to m->g0 stack. 484 // We get called to create new OS threads too, and those 485 // come in on the m->g0 stack already. 486 get_tls(CX) 487 MOVL m(CX), BP 488 MOVL m_g0(BP), SI 489 MOVL g(CX), DI 490 CMPL SI, DI 491 JEQ 6(PC) 492 MOVL SP, (g_sched+gobuf_sp)(DI) 493 MOVL $return<>(SB), (g_sched+gobuf_pc)(DI) 494 MOVL DI, (g_sched+gobuf_g)(DI) 495 MOVL SI, g(CX) 496 MOVL (g_sched+gobuf_sp)(SI), SP 497 498 // Now on a scheduling stack (a pthread-created stack). 499 SUBL $32, SP 500 ANDL $~15, SP // alignment, perhaps unnecessary 501 MOVL DI, 8(SP) // save g 502 MOVL DX, 4(SP) // save SP 503 MOVL BX, 0(SP) // first argument in x86-32 ABI 504 CALL AX 505 506 // Restore registers, g, stack pointer. 507 get_tls(CX) 508 MOVL 8(SP), DI 509 MOVL DI, g(CX) 510 MOVL 4(SP), SP 511 RET 512 513 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 514 // Turn the fn into a Go func (by taking its address) and call 515 // cgocallback_gofunc. 516 TEXT runtime·cgocallback(SB),7,$12 517 LEAL fn+0(FP), AX 518 MOVL AX, 0(SP) 519 MOVL frame+4(FP), AX 520 MOVL AX, 4(SP) 521 MOVL framesize+8(FP), AX 522 MOVL AX, 8(SP) 523 MOVL $runtime·cgocallback_gofunc(SB), AX 524 CALL AX 525 RET 526 527 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 528 // See cgocall.c for more details. 529 TEXT runtime·cgocallback_gofunc(SB),7,$12 530 // If m is nil, Go did not create the current thread. 531 // Call needm to obtain one for temporary use. 532 // In this case, we're running on the thread stack, so there's 533 // lots of space, but the linker doesn't know. Hide the call from 534 // the linker analysis by using an indirect call through AX. 535 get_tls(CX) 536 #ifdef GOOS_windows 537 CMPL CX, $0 538 JNE 3(PC) 539 PUSHL $0 540 JMP needm 541 #endif 542 MOVL m(CX), BP 543 PUSHL BP 544 CMPL BP, $0 545 JNE havem 546 needm: 547 MOVL $runtime·needm(SB), AX 548 CALL AX 549 get_tls(CX) 550 MOVL m(CX), BP 551 552 havem: 553 // Now there's a valid m, and we're running on its m->g0. 554 // Save current m->g0->sched.sp on stack and then set it to SP. 555 // Save current sp in m->g0->sched.sp in preparation for 556 // switch back to m->curg stack. 557 MOVL m_g0(BP), SI 558 PUSHL (g_sched+gobuf_sp)(SI) 559 MOVL SP, (g_sched+gobuf_sp)(SI) 560 561 // Switch to m->curg stack and call runtime.cgocallbackg 562 // with the three arguments. Because we are taking over 563 // the execution of m->curg but *not* resuming what had 564 // been running, we need to save that information (m->curg->gobuf) 565 // so that we can restore it when we're done. 566 // We can restore m->curg->gobuf.sp easily, because calling 567 // runtime.cgocallbackg leaves SP unchanged upon return. 568 // To save m->curg->gobuf.pc, we push it onto the stack. 569 // This has the added benefit that it looks to the traceback 570 // routine like cgocallbackg is going to return to that 571 // PC (because we defined cgocallbackg to have 572 // a frame size of 12, the same amount that we use below), 573 // so that the traceback will seamlessly trace back into 574 // the earlier calls. 575 MOVL fn+0(FP), AX 576 MOVL frame+4(FP), BX 577 MOVL framesize+8(FP), DX 578 579 MOVL m_curg(BP), SI 580 MOVL SI, g(CX) 581 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 582 583 // Push gobuf.pc 584 MOVL (g_sched+gobuf_pc)(SI), BP 585 SUBL $4, DI 586 MOVL BP, 0(DI) 587 588 // Push arguments to cgocallbackg. 589 // Frame size here must match the frame size above 590 // to trick traceback routines into doing the right thing. 591 SUBL $12, DI 592 MOVL AX, 0(DI) 593 MOVL BX, 4(DI) 594 MOVL DX, 8(DI) 595 596 // Switch stack and make the call. 597 MOVL DI, SP 598 CALL runtime·cgocallbackg(SB) 599 600 // Restore g->gobuf (== m->curg->gobuf) from saved values. 601 get_tls(CX) 602 MOVL g(CX), SI 603 MOVL 12(SP), BP 604 MOVL BP, (g_sched+gobuf_pc)(SI) 605 LEAL (12+4)(SP), DI 606 MOVL DI, (g_sched+gobuf_sp)(SI) 607 608 // Switch back to m->g0's stack and restore m->g0->sched.sp. 609 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 610 // so we do not have to restore it.) 611 MOVL m(CX), BP 612 MOVL m_g0(BP), SI 613 MOVL SI, g(CX) 614 MOVL (g_sched+gobuf_sp)(SI), SP 615 POPL (g_sched+gobuf_sp)(SI) 616 617 // If the m on entry was nil, we called needm above to borrow an m 618 // for the duration of the call. Since the call is over, return it with dropm. 619 POPL BP 620 CMPL BP, $0 621 JNE 3(PC) 622 MOVL $runtime·dropm(SB), AX 623 CALL AX 624 625 // Done! 626 RET 627 628 // void setmg(M*, G*); set m and g. for use by needm. 629 TEXT runtime·setmg(SB), 7, $0 630 #ifdef GOOS_windows 631 MOVL mm+0(FP), AX 632 CMPL AX, $0 633 JNE settls 634 MOVL $0, 0x14(FS) 635 RET 636 settls: 637 LEAL m_tls(AX), AX 638 MOVL AX, 0x14(FS) 639 #endif 640 MOVL mm+0(FP), AX 641 get_tls(CX) 642 MOVL mm+0(FP), AX 643 MOVL AX, m(CX) 644 MOVL gg+4(FP), BX 645 MOVL BX, g(CX) 646 RET 647 648 // void setmg_gcc(M*, G*); set m and g. for use by gcc 649 TEXT setmg_gcc<>(SB), 7, $0 650 get_tls(AX) 651 MOVL mm+0(FP), DX 652 MOVL DX, m(AX) 653 MOVL gg+4(FP), DX 654 MOVL DX,g (AX) 655 RET 656 657 // check that SP is in range [g->stackbase, g->stackguard) 658 TEXT runtime·stackcheck(SB), 7, $0 659 get_tls(CX) 660 MOVL g(CX), AX 661 CMPL g_stackbase(AX), SP 662 JHI 2(PC) 663 INT $3 664 CMPL SP, g_stackguard(AX) 665 JHI 2(PC) 666 INT $3 667 RET 668 669 TEXT runtime·memclr(SB),7,$0 670 MOVL 4(SP), DI // arg 1 addr 671 MOVL 8(SP), CX // arg 2 count 672 MOVL CX, BX 673 ANDL $3, BX 674 SHRL $2, CX 675 MOVL $0, AX 676 CLD 677 REP 678 STOSL 679 MOVL BX, CX 680 REP 681 STOSB 682 RET 683 684 TEXT runtime·getcallerpc(SB),7,$0 685 MOVL x+0(FP),AX // addr of first arg 686 MOVL -4(AX),AX // get calling pc 687 RET 688 689 TEXT runtime·setcallerpc(SB),7,$0 690 MOVL x+0(FP),AX // addr of first arg 691 MOVL x+4(FP), BX 692 MOVL BX, -4(AX) // set calling pc 693 RET 694 695 TEXT runtime·getcallersp(SB), 7, $0 696 MOVL sp+0(FP), AX 697 RET 698 699 // int64 runtime·cputicks(void), so really 700 // void runtime·cputicks(int64 *ticks) 701 TEXT runtime·cputicks(SB),7,$0 702 RDTSC 703 MOVL ret+0(FP), DI 704 MOVL AX, 0(DI) 705 MOVL DX, 4(DI) 706 RET 707 708 TEXT runtime·ldt0setup(SB),7,$16 709 // set up ldt 7 to point at tls0 710 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 711 // the entry number is just a hint. setldt will set up GS with what it used. 712 MOVL $7, 0(SP) 713 LEAL runtime·tls0(SB), AX 714 MOVL AX, 4(SP) 715 MOVL $32, 8(SP) // sizeof(tls array) 716 CALL runtime·setldt(SB) 717 RET 718 719 TEXT runtime·emptyfunc(SB),0,$0 720 RET 721 722 TEXT runtime·abort(SB),7,$0 723 INT $0x3 724 725 TEXT runtime·stackguard(SB),7,$0 726 MOVL SP, DX 727 MOVL DX, sp+0(FP) 728 get_tls(CX) 729 MOVL g(CX), BX 730 MOVL g_stackguard(BX), DX 731 MOVL DX, limit+4(FP) 732 RET 733 734 GLOBL runtime·tls0(SB), $32 735 736 // hash function using AES hardware instructions 737 TEXT runtime·aeshash(SB),7,$0 738 MOVL 4(SP), DX // ptr to hash value 739 MOVL 8(SP), CX // size 740 MOVL 12(SP), AX // ptr to data 741 JMP runtime·aeshashbody(SB) 742 743 TEXT runtime·aeshashstr(SB),7,$0 744 MOVL 4(SP), DX // ptr to hash value 745 MOVL 12(SP), AX // ptr to string struct 746 MOVL 4(AX), CX // length of string 747 MOVL (AX), AX // string data 748 JMP runtime·aeshashbody(SB) 749 750 // AX: data 751 // CX: length 752 // DX: ptr to seed input / hash output 753 TEXT runtime·aeshashbody(SB),7,$0 754 MOVL (DX), X0 // seed to low 32 bits of xmm0 755 PINSRD $1, CX, X0 // size to next 32 bits of xmm0 756 MOVO runtime·aeskeysched+0(SB), X2 757 MOVO runtime·aeskeysched+16(SB), X3 758 aesloop: 759 CMPL CX, $16 760 JB aesloopend 761 MOVOU (AX), X1 762 AESENC X2, X0 763 AESENC X1, X0 764 SUBL $16, CX 765 ADDL $16, AX 766 JMP aesloop 767 aesloopend: 768 TESTL CX, CX 769 JE finalize // no partial block 770 771 TESTL $16, AX 772 JNE highpartial 773 774 // address ends in 0xxxx. 16 bytes loaded 775 // at this address won't cross a page boundary, so 776 // we can load it directly. 777 MOVOU (AX), X1 778 ADDL CX, CX 779 PAND masks(SB)(CX*8), X1 780 JMP partial 781 highpartial: 782 // address ends in 1xxxx. Might be up against 783 // a page boundary, so load ending at last byte. 784 // Then shift bytes down using pshufb. 785 MOVOU -16(AX)(CX*1), X1 786 ADDL CX, CX 787 PSHUFB shifts(SB)(CX*8), X1 788 partial: 789 // incorporate partial block into hash 790 AESENC X3, X0 791 AESENC X1, X0 792 finalize: 793 // finalize hash 794 AESENC X2, X0 795 AESENC X3, X0 796 AESENC X2, X0 797 MOVL X0, (DX) 798 RET 799 800 TEXT runtime·aeshash32(SB),7,$0 801 MOVL 4(SP), DX // ptr to hash value 802 MOVL 12(SP), AX // ptr to data 803 MOVL (DX), X0 // seed 804 PINSRD $1, (AX), X0 // data 805 AESENC runtime·aeskeysched+0(SB), X0 806 AESENC runtime·aeskeysched+16(SB), X0 807 AESENC runtime·aeskeysched+0(SB), X0 808 MOVL X0, (DX) 809 RET 810 811 TEXT runtime·aeshash64(SB),7,$0 812 MOVL 4(SP), DX // ptr to hash value 813 MOVL 12(SP), AX // ptr to data 814 MOVQ (AX), X0 // data 815 PINSRD $2, (DX), X0 // seed 816 AESENC runtime·aeskeysched+0(SB), X0 817 AESENC runtime·aeskeysched+16(SB), X0 818 AESENC runtime·aeskeysched+0(SB), X0 819 MOVL X0, (DX) 820 RET 821 822 823 // simple mask to get rid of data in the high part of the register. 824 TEXT masks(SB),7,$0 825 LONG $0x00000000 826 LONG $0x00000000 827 LONG $0x00000000 828 LONG $0x00000000 829 830 LONG $0x000000ff 831 LONG $0x00000000 832 LONG $0x00000000 833 LONG $0x00000000 834 835 LONG $0x0000ffff 836 LONG $0x00000000 837 LONG $0x00000000 838 LONG $0x00000000 839 840 LONG $0x00ffffff 841 LONG $0x00000000 842 LONG $0x00000000 843 LONG $0x00000000 844 845 LONG $0xffffffff 846 LONG $0x00000000 847 LONG $0x00000000 848 LONG $0x00000000 849 850 LONG $0xffffffff 851 LONG $0x000000ff 852 LONG $0x00000000 853 LONG $0x00000000 854 855 LONG $0xffffffff 856 LONG $0x0000ffff 857 LONG $0x00000000 858 LONG $0x00000000 859 860 LONG $0xffffffff 861 LONG $0x00ffffff 862 LONG $0x00000000 863 LONG $0x00000000 864 865 LONG $0xffffffff 866 LONG $0xffffffff 867 LONG $0x00000000 868 LONG $0x00000000 869 870 LONG $0xffffffff 871 LONG $0xffffffff 872 LONG $0x000000ff 873 LONG $0x00000000 874 875 LONG $0xffffffff 876 LONG $0xffffffff 877 LONG $0x0000ffff 878 LONG $0x00000000 879 880 LONG $0xffffffff 881 LONG $0xffffffff 882 LONG $0x00ffffff 883 LONG $0x00000000 884 885 LONG $0xffffffff 886 LONG $0xffffffff 887 LONG $0xffffffff 888 LONG $0x00000000 889 890 LONG $0xffffffff 891 LONG $0xffffffff 892 LONG $0xffffffff 893 LONG $0x000000ff 894 895 LONG $0xffffffff 896 LONG $0xffffffff 897 LONG $0xffffffff 898 LONG $0x0000ffff 899 900 LONG $0xffffffff 901 LONG $0xffffffff 902 LONG $0xffffffff 903 LONG $0x00ffffff 904 905 // these are arguments to pshufb. They move data down from 906 // the high bytes of the register to the low bytes of the register. 907 // index is how many bytes to move. 908 TEXT shifts(SB),7,$0 909 LONG $0x00000000 910 LONG $0x00000000 911 LONG $0x00000000 912 LONG $0x00000000 913 914 LONG $0xffffff0f 915 LONG $0xffffffff 916 LONG $0xffffffff 917 LONG $0xffffffff 918 919 LONG $0xffff0f0e 920 LONG $0xffffffff 921 LONG $0xffffffff 922 LONG $0xffffffff 923 924 LONG $0xff0f0e0d 925 LONG $0xffffffff 926 LONG $0xffffffff 927 LONG $0xffffffff 928 929 LONG $0x0f0e0d0c 930 LONG $0xffffffff 931 LONG $0xffffffff 932 LONG $0xffffffff 933 934 LONG $0x0e0d0c0b 935 LONG $0xffffff0f 936 LONG $0xffffffff 937 LONG $0xffffffff 938 939 LONG $0x0d0c0b0a 940 LONG $0xffff0f0e 941 LONG $0xffffffff 942 LONG $0xffffffff 943 944 LONG $0x0c0b0a09 945 LONG $0xff0f0e0d 946 LONG $0xffffffff 947 LONG $0xffffffff 948 949 LONG $0x0b0a0908 950 LONG $0x0f0e0d0c 951 LONG $0xffffffff 952 LONG $0xffffffff 953 954 LONG $0x0a090807 955 LONG $0x0e0d0c0b 956 LONG $0xffffff0f 957 LONG $0xffffffff 958 959 LONG $0x09080706 960 LONG $0x0d0c0b0a 961 LONG $0xffff0f0e 962 LONG $0xffffffff 963 964 LONG $0x08070605 965 LONG $0x0c0b0a09 966 LONG $0xff0f0e0d 967 LONG $0xffffffff 968 969 LONG $0x07060504 970 LONG $0x0b0a0908 971 LONG $0x0f0e0d0c 972 LONG $0xffffffff 973 974 LONG $0x06050403 975 LONG $0x0a090807 976 LONG $0x0e0d0c0b 977 LONG $0xffffff0f 978 979 LONG $0x05040302 980 LONG $0x09080706 981 LONG $0x0d0c0b0a 982 LONG $0xffff0f0e 983 984 LONG $0x04030201 985 LONG $0x08070605 986 LONG $0x0c0b0a09 987 LONG $0xff0f0e0d 988 989 TEXT runtime·memeq(SB),7,$0 990 MOVL a+0(FP), SI 991 MOVL b+4(FP), DI 992 MOVL count+8(FP), BX 993 JMP runtime·memeqbody(SB) 994 995 996 TEXT bytes·Equal(SB),7,$0 997 MOVL a_len+4(FP), BX 998 MOVL b_len+16(FP), CX 999 XORL AX, AX 1000 CMPL BX, CX 1001 JNE eqret 1002 MOVL a+0(FP), SI 1003 MOVL b+12(FP), DI 1004 CALL runtime·memeqbody(SB) 1005 eqret: 1006 MOVB AX, ret+24(FP) 1007 RET 1008 1009 // a in SI 1010 // b in DI 1011 // count in BX 1012 TEXT runtime·memeqbody(SB),7,$0 1013 XORL AX, AX 1014 1015 CMPL BX, $4 1016 JB small 1017 1018 // 64 bytes at a time using xmm registers 1019 hugeloop: 1020 CMPL BX, $64 1021 JB bigloop 1022 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1023 JE bigloop 1024 MOVOU (SI), X0 1025 MOVOU (DI), X1 1026 MOVOU 16(SI), X2 1027 MOVOU 16(DI), X3 1028 MOVOU 32(SI), X4 1029 MOVOU 32(DI), X5 1030 MOVOU 48(SI), X6 1031 MOVOU 48(DI), X7 1032 PCMPEQB X1, X0 1033 PCMPEQB X3, X2 1034 PCMPEQB X5, X4 1035 PCMPEQB X7, X6 1036 PAND X2, X0 1037 PAND X6, X4 1038 PAND X4, X0 1039 PMOVMSKB X0, DX 1040 ADDL $64, SI 1041 ADDL $64, DI 1042 SUBL $64, BX 1043 CMPL DX, $0xffff 1044 JEQ hugeloop 1045 RET 1046 1047 // 4 bytes at a time using 32-bit register 1048 bigloop: 1049 CMPL BX, $4 1050 JBE leftover 1051 MOVL (SI), CX 1052 MOVL (DI), DX 1053 ADDL $4, SI 1054 ADDL $4, DI 1055 SUBL $4, BX 1056 CMPL CX, DX 1057 JEQ bigloop 1058 RET 1059 1060 // remaining 0-4 bytes 1061 leftover: 1062 MOVL -4(SI)(BX*1), CX 1063 MOVL -4(DI)(BX*1), DX 1064 CMPL CX, DX 1065 SETEQ AX 1066 RET 1067 1068 small: 1069 CMPL BX, $0 1070 JEQ equal 1071 1072 LEAL 0(BX*8), CX 1073 NEGL CX 1074 1075 MOVL SI, DX 1076 CMPB DX, $0xfc 1077 JA si_high 1078 1079 // load at SI won't cross a page boundary. 1080 MOVL (SI), SI 1081 JMP si_finish 1082 si_high: 1083 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1084 MOVL -4(SI)(BX*1), SI 1085 SHRL CX, SI 1086 si_finish: 1087 1088 // same for DI. 1089 MOVL DI, DX 1090 CMPB DX, $0xfc 1091 JA di_high 1092 MOVL (DI), DI 1093 JMP di_finish 1094 di_high: 1095 MOVL -4(DI)(BX*1), DI 1096 SHRL CX, DI 1097 di_finish: 1098 1099 SUBL SI, DI 1100 SHLL CX, DI 1101 equal: 1102 SETEQ AX 1103 RET