github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, AX // Move to global variable clobbers CX when generating PIC 49 MOVL AX, runtime·cpuid_ecx(SB) 50 MOVL DX, runtime·cpuid_edx(SB) 51 nocpuinfo: 52 53 // if there is an _cgo_init, call it to let it 54 // initialize and to set up GS. if not, 55 // we set up GS ourselves. 56 MOVL _cgo_init(SB), AX 57 TESTL AX, AX 58 JZ needtls 59 MOVL $setg_gcc<>(SB), BX 60 MOVL BX, 4(SP) 61 MOVL BP, 0(SP) 62 CALL AX 63 64 // update stackguard after _cgo_init 65 MOVL $runtime·g0(SB), CX 66 MOVL (g_stack+stack_lo)(CX), AX 67 ADDL $const__StackGuard, AX 68 MOVL AX, g_stackguard0(CX) 69 MOVL AX, g_stackguard1(CX) 70 71 #ifndef GOOS_windows 72 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 73 JMP ok 74 #endif 75 needtls: 76 #ifdef GOOS_plan9 77 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 78 JMP ok 79 #endif 80 81 // set up %gs 82 CALL runtime·ldt0setup(SB) 83 84 // store through it, to make sure it works 85 get_tls(BX) 86 MOVL $0x123, g(BX) 87 MOVL runtime·m0+m_tls(SB), AX 88 CMPL AX, $0x123 89 JEQ ok 90 MOVL AX, 0 // abort 91 ok: 92 // set up m and g "registers" 93 get_tls(BX) 94 LEAL runtime·g0(SB), DX 95 MOVL DX, g(BX) 96 LEAL runtime·m0(SB), AX 97 98 // save m->g0 = g0 99 MOVL DX, m_g0(AX) 100 // save g0->m = m0 101 MOVL AX, g_m(DX) 102 103 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 104 105 // convention is D is always cleared 106 CLD 107 108 CALL runtime·check(SB) 109 110 // saved argc, argv 111 MOVL 120(SP), AX 112 MOVL AX, 0(SP) 113 MOVL 124(SP), AX 114 MOVL AX, 4(SP) 115 CALL runtime·args(SB) 116 CALL runtime·osinit(SB) 117 CALL runtime·schedinit(SB) 118 119 // create a new goroutine to start program 120 PUSHL $runtime·mainPC(SB) // entry 121 PUSHL $0 // arg size 122 CALL runtime·newproc(SB) 123 POPL AX 124 POPL AX 125 126 // start this M 127 CALL runtime·mstart(SB) 128 129 INT $3 130 RET 131 132 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 133 GLOBL runtime·mainPC(SB),RODATA,$4 134 135 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 136 INT $3 137 RET 138 139 TEXT runtime·asminit(SB),NOSPLIT,$0-0 140 // Linux and MinGW start the FPU in extended double precision. 141 // Other operating systems use double precision. 142 // Change to double precision to match them, 143 // and to match other hardware that only has double. 144 PUSHL $0x27F 145 FLDCW 0(SP) 146 POPL AX 147 RET 148 149 /* 150 * go-routine 151 */ 152 153 // void gosave(Gobuf*) 154 // save state in Gobuf; setjmp 155 TEXT runtime·gosave(SB), NOSPLIT, $0-4 156 MOVL buf+0(FP), AX // gobuf 157 LEAL buf+0(FP), BX // caller's SP 158 MOVL BX, gobuf_sp(AX) 159 MOVL 0(SP), BX // caller's PC 160 MOVL BX, gobuf_pc(AX) 161 MOVL $0, gobuf_ret(AX) 162 MOVL $0, gobuf_ctxt(AX) 163 get_tls(CX) 164 MOVL g(CX), BX 165 MOVL BX, gobuf_g(AX) 166 RET 167 168 // void gogo(Gobuf*) 169 // restore state from Gobuf; longjmp 170 TEXT runtime·gogo(SB), NOSPLIT, $0-4 171 MOVL buf+0(FP), BX // gobuf 172 MOVL gobuf_g(BX), DX 173 MOVL 0(DX), CX // make sure g != nil 174 get_tls(CX) 175 MOVL DX, g(CX) 176 MOVL gobuf_sp(BX), SP // restore SP 177 MOVL gobuf_ret(BX), AX 178 MOVL gobuf_ctxt(BX), DX 179 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 180 MOVL $0, gobuf_ret(BX) 181 MOVL $0, gobuf_ctxt(BX) 182 MOVL gobuf_pc(BX), BX 183 JMP BX 184 185 // func mcall(fn func(*g)) 186 // Switch to m->g0's stack, call fn(g). 187 // Fn must never return. It should gogo(&g->sched) 188 // to keep running g. 189 TEXT runtime·mcall(SB), NOSPLIT, $0-4 190 MOVL fn+0(FP), DI 191 192 get_tls(DX) 193 MOVL g(DX), AX // save state in g->sched 194 MOVL 0(SP), BX // caller's PC 195 MOVL BX, (g_sched+gobuf_pc)(AX) 196 LEAL fn+0(FP), BX // caller's SP 197 MOVL BX, (g_sched+gobuf_sp)(AX) 198 MOVL AX, (g_sched+gobuf_g)(AX) 199 200 // switch to m->g0 & its stack, call fn 201 MOVL g(DX), BX 202 MOVL g_m(BX), BX 203 MOVL m_g0(BX), SI 204 CMPL SI, AX // if g == m->g0 call badmcall 205 JNE 3(PC) 206 MOVL $runtime·badmcall(SB), AX 207 JMP AX 208 MOVL SI, g(DX) // g = m->g0 209 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 210 PUSHL AX 211 MOVL DI, DX 212 MOVL 0(DI), DI 213 CALL DI 214 POPL AX 215 MOVL $runtime·badmcall2(SB), AX 216 JMP AX 217 RET 218 219 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 220 // of the G stack. We need to distinguish the routine that 221 // lives at the bottom of the G stack from the one that lives 222 // at the top of the system stack because the one at the top of 223 // the system stack terminates the stack walk (see topofstack()). 224 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 225 RET 226 227 // func systemstack(fn func()) 228 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 229 MOVL fn+0(FP), DI // DI = fn 230 get_tls(CX) 231 MOVL g(CX), AX // AX = g 232 MOVL g_m(AX), BX // BX = m 233 234 MOVL m_gsignal(BX), DX // DX = gsignal 235 CMPL AX, DX 236 JEQ noswitch 237 238 MOVL m_g0(BX), DX // DX = g0 239 CMPL AX, DX 240 JEQ noswitch 241 242 MOVL m_curg(BX), BP 243 CMPL AX, BP 244 JEQ switch 245 246 // Bad: g is not gsignal, not g0, not curg. What is it? 247 // Hide call from linker nosplit analysis. 248 MOVL $runtime·badsystemstack(SB), AX 249 CALL AX 250 251 switch: 252 // save our state in g->sched. Pretend to 253 // be systemstack_switch if the G stack is scanned. 254 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 255 MOVL SP, (g_sched+gobuf_sp)(AX) 256 MOVL AX, (g_sched+gobuf_g)(AX) 257 258 // switch to g0 259 get_tls(CX) 260 MOVL DX, g(CX) 261 MOVL (g_sched+gobuf_sp)(DX), BX 262 // make it look like mstart called systemstack on g0, to stop traceback 263 SUBL $4, BX 264 MOVL $runtime·mstart(SB), DX 265 MOVL DX, 0(BX) 266 MOVL BX, SP 267 268 // call target function 269 MOVL DI, DX 270 MOVL 0(DI), DI 271 CALL DI 272 273 // switch back to g 274 get_tls(CX) 275 MOVL g(CX), AX 276 MOVL g_m(AX), BX 277 MOVL m_curg(BX), AX 278 MOVL AX, g(CX) 279 MOVL (g_sched+gobuf_sp)(AX), SP 280 MOVL $0, (g_sched+gobuf_sp)(AX) 281 RET 282 283 noswitch: 284 // already on system stack, just call directly 285 MOVL DI, DX 286 MOVL 0(DI), DI 287 CALL DI 288 RET 289 290 /* 291 * support for morestack 292 */ 293 294 // Called during function prolog when more stack is needed. 295 // 296 // The traceback routines see morestack on a g0 as being 297 // the top of a stack (for example, morestack calling newstack 298 // calling the scheduler calling newm calling gc), so we must 299 // record an argument size. For that purpose, it has no arguments. 300 TEXT runtime·morestack(SB),NOSPLIT,$0-0 301 // Cannot grow scheduler stack (m->g0). 302 get_tls(CX) 303 MOVL g(CX), BX 304 MOVL g_m(BX), BX 305 MOVL m_g0(BX), SI 306 CMPL g(CX), SI 307 JNE 2(PC) 308 INT $3 309 310 // Cannot grow signal stack. 311 MOVL m_gsignal(BX), SI 312 CMPL g(CX), SI 313 JNE 2(PC) 314 INT $3 315 316 // Called from f. 317 // Set m->morebuf to f's caller. 318 MOVL 4(SP), DI // f's caller's PC 319 MOVL DI, (m_morebuf+gobuf_pc)(BX) 320 LEAL 8(SP), CX // f's caller's SP 321 MOVL CX, (m_morebuf+gobuf_sp)(BX) 322 get_tls(CX) 323 MOVL g(CX), SI 324 MOVL SI, (m_morebuf+gobuf_g)(BX) 325 326 // Set g->sched to context in f. 327 MOVL 0(SP), AX // f's PC 328 MOVL AX, (g_sched+gobuf_pc)(SI) 329 MOVL SI, (g_sched+gobuf_g)(SI) 330 LEAL 4(SP), AX // f's SP 331 MOVL AX, (g_sched+gobuf_sp)(SI) 332 MOVL DX, (g_sched+gobuf_ctxt)(SI) 333 334 // Call newstack on m->g0's stack. 335 MOVL m_g0(BX), BP 336 MOVL BP, g(CX) 337 MOVL (g_sched+gobuf_sp)(BP), AX 338 MOVL -4(AX), BX // fault if CALL would, before smashing SP 339 MOVL AX, SP 340 CALL runtime·newstack(SB) 341 MOVL $0, 0x1003 // crash if newstack returns 342 RET 343 344 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 345 MOVL $0, DX 346 JMP runtime·morestack(SB) 347 348 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 349 // We came here via a RET to an overwritten return PC. 350 // AX may be live. Other registers are available. 351 352 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 353 get_tls(CX) 354 MOVL g(CX), CX 355 MOVL (g_stkbar+slice_array)(CX), DX 356 MOVL g_stkbarPos(CX), BX 357 IMULL $stkbar__size, BX // Too big for SIB. 358 MOVL stkbar_savedLRVal(DX)(BX*1), BX 359 // Record that this stack barrier was hit. 360 ADDL $1, g_stkbarPos(CX) 361 // Jump to the original return PC. 362 JMP BX 363 364 // reflectcall: call a function with the given argument list 365 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 366 // we don't have variable-sized frames, so we use a small number 367 // of constant-sized-frame functions to encode a few bits of size in the pc. 368 // Caution: ugly multiline assembly macros in your future! 369 370 #define DISPATCH(NAME,MAXSIZE) \ 371 CMPL CX, $MAXSIZE; \ 372 JA 3(PC); \ 373 MOVL $NAME(SB), AX; \ 374 JMP AX 375 // Note: can't just "JMP NAME(SB)" - bad inlining results. 376 377 TEXT reflect·call(SB), NOSPLIT, $0-0 378 JMP ·reflectcall(SB) 379 380 TEXT ·reflectcall(SB), NOSPLIT, $0-20 381 MOVL argsize+12(FP), CX 382 DISPATCH(runtime·call16, 16) 383 DISPATCH(runtime·call32, 32) 384 DISPATCH(runtime·call64, 64) 385 DISPATCH(runtime·call128, 128) 386 DISPATCH(runtime·call256, 256) 387 DISPATCH(runtime·call512, 512) 388 DISPATCH(runtime·call1024, 1024) 389 DISPATCH(runtime·call2048, 2048) 390 DISPATCH(runtime·call4096, 4096) 391 DISPATCH(runtime·call8192, 8192) 392 DISPATCH(runtime·call16384, 16384) 393 DISPATCH(runtime·call32768, 32768) 394 DISPATCH(runtime·call65536, 65536) 395 DISPATCH(runtime·call131072, 131072) 396 DISPATCH(runtime·call262144, 262144) 397 DISPATCH(runtime·call524288, 524288) 398 DISPATCH(runtime·call1048576, 1048576) 399 DISPATCH(runtime·call2097152, 2097152) 400 DISPATCH(runtime·call4194304, 4194304) 401 DISPATCH(runtime·call8388608, 8388608) 402 DISPATCH(runtime·call16777216, 16777216) 403 DISPATCH(runtime·call33554432, 33554432) 404 DISPATCH(runtime·call67108864, 67108864) 405 DISPATCH(runtime·call134217728, 134217728) 406 DISPATCH(runtime·call268435456, 268435456) 407 DISPATCH(runtime·call536870912, 536870912) 408 DISPATCH(runtime·call1073741824, 1073741824) 409 MOVL $runtime·badreflectcall(SB), AX 410 JMP AX 411 412 #define CALLFN(NAME,MAXSIZE) \ 413 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 414 NO_LOCAL_POINTERS; \ 415 /* copy arguments to stack */ \ 416 MOVL argptr+8(FP), SI; \ 417 MOVL argsize+12(FP), CX; \ 418 MOVL SP, DI; \ 419 REP;MOVSB; \ 420 /* call function */ \ 421 MOVL f+4(FP), DX; \ 422 MOVL (DX), AX; \ 423 PCDATA $PCDATA_StackMapIndex, $0; \ 424 CALL AX; \ 425 /* copy return values back */ \ 426 MOVL argptr+8(FP), DI; \ 427 MOVL argsize+12(FP), CX; \ 428 MOVL retoffset+16(FP), BX; \ 429 MOVL SP, SI; \ 430 ADDL BX, DI; \ 431 ADDL BX, SI; \ 432 SUBL BX, CX; \ 433 REP;MOVSB; \ 434 /* execute write barrier updates */ \ 435 MOVL argtype+0(FP), DX; \ 436 MOVL argptr+8(FP), DI; \ 437 MOVL argsize+12(FP), CX; \ 438 MOVL retoffset+16(FP), BX; \ 439 MOVL DX, 0(SP); \ 440 MOVL DI, 4(SP); \ 441 MOVL CX, 8(SP); \ 442 MOVL BX, 12(SP); \ 443 CALL runtime·callwritebarrier(SB); \ 444 RET 445 446 CALLFN(·call16, 16) 447 CALLFN(·call32, 32) 448 CALLFN(·call64, 64) 449 CALLFN(·call128, 128) 450 CALLFN(·call256, 256) 451 CALLFN(·call512, 512) 452 CALLFN(·call1024, 1024) 453 CALLFN(·call2048, 2048) 454 CALLFN(·call4096, 4096) 455 CALLFN(·call8192, 8192) 456 CALLFN(·call16384, 16384) 457 CALLFN(·call32768, 32768) 458 CALLFN(·call65536, 65536) 459 CALLFN(·call131072, 131072) 460 CALLFN(·call262144, 262144) 461 CALLFN(·call524288, 524288) 462 CALLFN(·call1048576, 1048576) 463 CALLFN(·call2097152, 2097152) 464 CALLFN(·call4194304, 4194304) 465 CALLFN(·call8388608, 8388608) 466 CALLFN(·call16777216, 16777216) 467 CALLFN(·call33554432, 33554432) 468 CALLFN(·call67108864, 67108864) 469 CALLFN(·call134217728, 134217728) 470 CALLFN(·call268435456, 268435456) 471 CALLFN(·call536870912, 536870912) 472 CALLFN(·call1073741824, 1073741824) 473 474 TEXT runtime·procyield(SB),NOSPLIT,$0-0 475 MOVL cycles+0(FP), AX 476 again: 477 PAUSE 478 SUBL $1, AX 479 JNZ again 480 RET 481 482 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 483 // Stores are already ordered on x86, so this is just a 484 // compile barrier. 485 RET 486 487 // void jmpdefer(fn, sp); 488 // called from deferreturn. 489 // 1. pop the caller 490 // 2. sub 5 bytes from the callers return 491 // 3. jmp to the argument 492 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 493 MOVL fv+0(FP), DX // fn 494 MOVL argp+4(FP), BX // caller sp 495 LEAL -4(BX), SP // caller sp after CALL 496 SUBL $5, (SP) // return to CALL again 497 MOVL 0(DX), BX 498 JMP BX // but first run the deferred function 499 500 // Save state of caller into g->sched. 501 TEXT gosave<>(SB),NOSPLIT,$0 502 PUSHL AX 503 PUSHL BX 504 get_tls(BX) 505 MOVL g(BX), BX 506 LEAL arg+0(FP), AX 507 MOVL AX, (g_sched+gobuf_sp)(BX) 508 MOVL -4(AX), AX 509 MOVL AX, (g_sched+gobuf_pc)(BX) 510 MOVL $0, (g_sched+gobuf_ret)(BX) 511 MOVL $0, (g_sched+gobuf_ctxt)(BX) 512 POPL BX 513 POPL AX 514 RET 515 516 // func asmcgocall(fn, arg unsafe.Pointer) int32 517 // Call fn(arg) on the scheduler stack, 518 // aligned appropriately for the gcc ABI. 519 // See cgocall.go for more details. 520 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 521 MOVL fn+0(FP), AX 522 MOVL arg+4(FP), BX 523 524 MOVL SP, DX 525 526 // Figure out if we need to switch to m->g0 stack. 527 // We get called to create new OS threads too, and those 528 // come in on the m->g0 stack already. 529 get_tls(CX) 530 MOVL g(CX), BP 531 MOVL g_m(BP), BP 532 MOVL m_g0(BP), SI 533 MOVL g(CX), DI 534 CMPL SI, DI 535 JEQ noswitch 536 CALL gosave<>(SB) 537 get_tls(CX) 538 MOVL SI, g(CX) 539 MOVL (g_sched+gobuf_sp)(SI), SP 540 541 noswitch: 542 // Now on a scheduling stack (a pthread-created stack). 543 SUBL $32, SP 544 ANDL $~15, SP // alignment, perhaps unnecessary 545 MOVL DI, 8(SP) // save g 546 MOVL (g_stack+stack_hi)(DI), DI 547 SUBL DX, DI 548 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 549 MOVL BX, 0(SP) // first argument in x86-32 ABI 550 CALL AX 551 552 // Restore registers, g, stack pointer. 553 get_tls(CX) 554 MOVL 8(SP), DI 555 MOVL (g_stack+stack_hi)(DI), SI 556 SUBL 4(SP), SI 557 MOVL DI, g(CX) 558 MOVL SI, SP 559 560 MOVL AX, ret+8(FP) 561 RET 562 563 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 564 // Turn the fn into a Go func (by taking its address) and call 565 // cgocallback_gofunc. 566 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 567 LEAL fn+0(FP), AX 568 MOVL AX, 0(SP) 569 MOVL frame+4(FP), AX 570 MOVL AX, 4(SP) 571 MOVL framesize+8(FP), AX 572 MOVL AX, 8(SP) 573 MOVL $runtime·cgocallback_gofunc(SB), AX 574 CALL AX 575 RET 576 577 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 578 // See cgocall.go for more details. 579 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 580 NO_LOCAL_POINTERS 581 582 // If g is nil, Go did not create the current thread. 583 // Call needm to obtain one for temporary use. 584 // In this case, we're running on the thread stack, so there's 585 // lots of space, but the linker doesn't know. Hide the call from 586 // the linker analysis by using an indirect call through AX. 587 get_tls(CX) 588 #ifdef GOOS_windows 589 MOVL $0, BP 590 CMPL CX, $0 591 JEQ 2(PC) // TODO 592 #endif 593 MOVL g(CX), BP 594 CMPL BP, $0 595 JEQ needm 596 MOVL g_m(BP), BP 597 MOVL BP, DX // saved copy of oldm 598 JMP havem 599 needm: 600 MOVL $0, 0(SP) 601 MOVL $runtime·needm(SB), AX 602 CALL AX 603 MOVL 0(SP), DX 604 get_tls(CX) 605 MOVL g(CX), BP 606 MOVL g_m(BP), BP 607 608 // Set m->sched.sp = SP, so that if a panic happens 609 // during the function we are about to execute, it will 610 // have a valid SP to run on the g0 stack. 611 // The next few lines (after the havem label) 612 // will save this SP onto the stack and then write 613 // the same SP back to m->sched.sp. That seems redundant, 614 // but if an unrecovered panic happens, unwindm will 615 // restore the g->sched.sp from the stack location 616 // and then systemstack will try to use it. If we don't set it here, 617 // that restored SP will be uninitialized (typically 0) and 618 // will not be usable. 619 MOVL m_g0(BP), SI 620 MOVL SP, (g_sched+gobuf_sp)(SI) 621 622 havem: 623 // Now there's a valid m, and we're running on its m->g0. 624 // Save current m->g0->sched.sp on stack and then set it to SP. 625 // Save current sp in m->g0->sched.sp in preparation for 626 // switch back to m->curg stack. 627 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 628 MOVL m_g0(BP), SI 629 MOVL (g_sched+gobuf_sp)(SI), AX 630 MOVL AX, 0(SP) 631 MOVL SP, (g_sched+gobuf_sp)(SI) 632 633 // Switch to m->curg stack and call runtime.cgocallbackg. 634 // Because we are taking over the execution of m->curg 635 // but *not* resuming what had been running, we need to 636 // save that information (m->curg->sched) so we can restore it. 637 // We can restore m->curg->sched.sp easily, because calling 638 // runtime.cgocallbackg leaves SP unchanged upon return. 639 // To save m->curg->sched.pc, we push it onto the stack. 640 // This has the added benefit that it looks to the traceback 641 // routine like cgocallbackg is going to return to that 642 // PC (because the frame we allocate below has the same 643 // size as cgocallback_gofunc's frame declared above) 644 // so that the traceback will seamlessly trace back into 645 // the earlier calls. 646 // 647 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 648 // 4(SP) and 8(SP) are unused. 649 MOVL m_curg(BP), SI 650 MOVL SI, g(CX) 651 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 652 MOVL (g_sched+gobuf_pc)(SI), BP 653 MOVL BP, -4(DI) 654 LEAL -(4+12)(DI), SP 655 MOVL DX, 0(SP) 656 CALL runtime·cgocallbackg(SB) 657 MOVL 0(SP), DX 658 659 // Restore g->sched (== m->curg->sched) from saved values. 660 get_tls(CX) 661 MOVL g(CX), SI 662 MOVL 12(SP), BP 663 MOVL BP, (g_sched+gobuf_pc)(SI) 664 LEAL (12+4)(SP), DI 665 MOVL DI, (g_sched+gobuf_sp)(SI) 666 667 // Switch back to m->g0's stack and restore m->g0->sched.sp. 668 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 669 // so we do not have to restore it.) 670 MOVL g(CX), BP 671 MOVL g_m(BP), BP 672 MOVL m_g0(BP), SI 673 MOVL SI, g(CX) 674 MOVL (g_sched+gobuf_sp)(SI), SP 675 MOVL 0(SP), AX 676 MOVL AX, (g_sched+gobuf_sp)(SI) 677 678 // If the m on entry was nil, we called needm above to borrow an m 679 // for the duration of the call. Since the call is over, return it with dropm. 680 CMPL DX, $0 681 JNE 3(PC) 682 MOVL $runtime·dropm(SB), AX 683 CALL AX 684 685 // Done! 686 RET 687 688 // void setg(G*); set g. for use by needm. 689 TEXT runtime·setg(SB), NOSPLIT, $0-4 690 MOVL gg+0(FP), BX 691 #ifdef GOOS_windows 692 CMPL BX, $0 693 JNE settls 694 MOVL $0, 0x14(FS) 695 RET 696 settls: 697 MOVL g_m(BX), AX 698 LEAL m_tls(AX), AX 699 MOVL AX, 0x14(FS) 700 #endif 701 get_tls(CX) 702 MOVL BX, g(CX) 703 RET 704 705 // void setg_gcc(G*); set g. for use by gcc 706 TEXT setg_gcc<>(SB), NOSPLIT, $0 707 get_tls(AX) 708 MOVL gg+0(FP), DX 709 MOVL DX, g(AX) 710 RET 711 712 // check that SP is in range [g->stack.lo, g->stack.hi) 713 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 714 get_tls(CX) 715 MOVL g(CX), AX 716 CMPL (g_stack+stack_hi)(AX), SP 717 JHI 2(PC) 718 INT $3 719 CMPL SP, (g_stack+stack_lo)(AX) 720 JHI 2(PC) 721 INT $3 722 RET 723 724 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 725 MOVL argp+0(FP),AX // addr of first arg 726 MOVL -4(AX),AX // get calling pc 727 CMPL AX, runtime·stackBarrierPC(SB) 728 JNE nobar 729 // Get original return PC. 730 CALL runtime·nextBarrierPC(SB) 731 MOVL 0(SP), AX 732 nobar: 733 MOVL AX, ret+4(FP) 734 RET 735 736 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 737 MOVL argp+0(FP),AX // addr of first arg 738 MOVL pc+4(FP), BX 739 MOVL -4(AX), DX 740 CMPL DX, runtime·stackBarrierPC(SB) 741 JEQ setbar 742 MOVL BX, -4(AX) // set calling pc 743 RET 744 setbar: 745 // Set the stack barrier return PC. 746 MOVL BX, 0(SP) 747 CALL runtime·setNextBarrierPC(SB) 748 RET 749 750 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 751 MOVL argp+0(FP), AX 752 MOVL AX, ret+4(FP) 753 RET 754 755 // func cputicks() int64 756 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 757 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 758 JEQ done 759 CMPB runtime·lfenceBeforeRdtsc(SB), $1 760 JNE mfence 761 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 762 JMP done 763 mfence: 764 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 765 done: 766 RDTSC 767 MOVL AX, ret_lo+0(FP) 768 MOVL DX, ret_hi+4(FP) 769 RET 770 771 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 772 // set up ldt 7 to point at m0.tls 773 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 774 // the entry number is just a hint. setldt will set up GS with what it used. 775 MOVL $7, 0(SP) 776 LEAL runtime·m0+m_tls(SB), AX 777 MOVL AX, 4(SP) 778 MOVL $32, 8(SP) // sizeof(tls array) 779 CALL runtime·setldt(SB) 780 RET 781 782 TEXT runtime·emptyfunc(SB),0,$0-0 783 RET 784 785 TEXT runtime·abort(SB),NOSPLIT,$0-0 786 INT $0x3 787 788 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 789 // redirects to memhash(p, h, size) using the size 790 // stored in the closure. 791 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 792 GO_ARGS 793 NO_LOCAL_POINTERS 794 MOVL p+0(FP), AX 795 MOVL h+4(FP), BX 796 MOVL 4(DX), CX 797 MOVL AX, 0(SP) 798 MOVL BX, 4(SP) 799 MOVL CX, 8(SP) 800 CALL runtime·memhash(SB) 801 MOVL 12(SP), AX 802 MOVL AX, ret+8(FP) 803 RET 804 805 // hash function using AES hardware instructions 806 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 807 MOVL p+0(FP), AX // ptr to data 808 MOVL s+8(FP), BX // size 809 LEAL ret+12(FP), DX 810 JMP runtime·aeshashbody(SB) 811 812 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 813 MOVL p+0(FP), AX // ptr to string object 814 MOVL 4(AX), BX // length of string 815 MOVL (AX), AX // string data 816 LEAL ret+8(FP), DX 817 JMP runtime·aeshashbody(SB) 818 819 // AX: data 820 // BX: length 821 // DX: address to put return value 822 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 823 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 824 PINSRW $4, BX, X0 // 16 bits of length 825 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 826 MOVO X0, X1 // save unscrambled seed 827 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 828 AESENC X0, X0 // scramble seed 829 830 CMPL BX, $16 831 JB aes0to15 832 JE aes16 833 CMPL BX, $32 834 JBE aes17to32 835 CMPL BX, $64 836 JBE aes33to64 837 JMP aes65plus 838 839 aes0to15: 840 TESTL BX, BX 841 JE aes0 842 843 ADDL $16, AX 844 TESTW $0xff0, AX 845 JE endofpage 846 847 // 16 bytes loaded at this address won't cross 848 // a page boundary, so we can load it directly. 849 MOVOU -16(AX), X1 850 ADDL BX, BX 851 PAND masks<>(SB)(BX*8), X1 852 853 final1: 854 AESENC X0, X1 // scramble input, xor in seed 855 AESENC X1, X1 // scramble combo 2 times 856 AESENC X1, X1 857 MOVL X1, (DX) 858 RET 859 860 endofpage: 861 // address ends in 1111xxxx. Might be up against 862 // a page boundary, so load ending at last byte. 863 // Then shift bytes down using pshufb. 864 MOVOU -32(AX)(BX*1), X1 865 ADDL BX, BX 866 PSHUFB shifts<>(SB)(BX*8), X1 867 JMP final1 868 869 aes0: 870 // Return scrambled input seed 871 AESENC X0, X0 872 MOVL X0, (DX) 873 RET 874 875 aes16: 876 MOVOU (AX), X1 877 JMP final1 878 879 aes17to32: 880 // make second starting seed 881 PXOR runtime·aeskeysched+16(SB), X1 882 AESENC X1, X1 883 884 // load data to be hashed 885 MOVOU (AX), X2 886 MOVOU -16(AX)(BX*1), X3 887 888 // scramble 3 times 889 AESENC X0, X2 890 AESENC X1, X3 891 AESENC X2, X2 892 AESENC X3, X3 893 AESENC X2, X2 894 AESENC X3, X3 895 896 // combine results 897 PXOR X3, X2 898 MOVL X2, (DX) 899 RET 900 901 aes33to64: 902 // make 3 more starting seeds 903 MOVO X1, X2 904 MOVO X1, X3 905 PXOR runtime·aeskeysched+16(SB), X1 906 PXOR runtime·aeskeysched+32(SB), X2 907 PXOR runtime·aeskeysched+48(SB), X3 908 AESENC X1, X1 909 AESENC X2, X2 910 AESENC X3, X3 911 912 MOVOU (AX), X4 913 MOVOU 16(AX), X5 914 MOVOU -32(AX)(BX*1), X6 915 MOVOU -16(AX)(BX*1), X7 916 917 AESENC X0, X4 918 AESENC X1, X5 919 AESENC X2, X6 920 AESENC X3, X7 921 922 AESENC X4, X4 923 AESENC X5, X5 924 AESENC X6, X6 925 AESENC X7, X7 926 927 AESENC X4, X4 928 AESENC X5, X5 929 AESENC X6, X6 930 AESENC X7, X7 931 932 PXOR X6, X4 933 PXOR X7, X5 934 PXOR X5, X4 935 MOVL X4, (DX) 936 RET 937 938 aes65plus: 939 // make 3 more starting seeds 940 MOVO X1, X2 941 MOVO X1, X3 942 PXOR runtime·aeskeysched+16(SB), X1 943 PXOR runtime·aeskeysched+32(SB), X2 944 PXOR runtime·aeskeysched+48(SB), X3 945 AESENC X1, X1 946 AESENC X2, X2 947 AESENC X3, X3 948 949 // start with last (possibly overlapping) block 950 MOVOU -64(AX)(BX*1), X4 951 MOVOU -48(AX)(BX*1), X5 952 MOVOU -32(AX)(BX*1), X6 953 MOVOU -16(AX)(BX*1), X7 954 955 // scramble state once 956 AESENC X0, X4 957 AESENC X1, X5 958 AESENC X2, X6 959 AESENC X3, X7 960 961 // compute number of remaining 64-byte blocks 962 DECL BX 963 SHRL $6, BX 964 965 aesloop: 966 // scramble state, xor in a block 967 MOVOU (AX), X0 968 MOVOU 16(AX), X1 969 MOVOU 32(AX), X2 970 MOVOU 48(AX), X3 971 AESENC X0, X4 972 AESENC X1, X5 973 AESENC X2, X6 974 AESENC X3, X7 975 976 // scramble state 977 AESENC X4, X4 978 AESENC X5, X5 979 AESENC X6, X6 980 AESENC X7, X7 981 982 ADDL $64, AX 983 DECL BX 984 JNE aesloop 985 986 // 2 more scrambles to finish 987 AESENC X4, X4 988 AESENC X5, X5 989 AESENC X6, X6 990 AESENC X7, X7 991 992 AESENC X4, X4 993 AESENC X5, X5 994 AESENC X6, X6 995 AESENC X7, X7 996 997 PXOR X6, X4 998 PXOR X7, X5 999 PXOR X5, X4 1000 MOVL X4, (DX) 1001 RET 1002 1003 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1004 MOVL p+0(FP), AX // ptr to data 1005 MOVL h+4(FP), X0 // seed 1006 PINSRD $1, (AX), X0 // data 1007 AESENC runtime·aeskeysched+0(SB), X0 1008 AESENC runtime·aeskeysched+16(SB), X0 1009 AESENC runtime·aeskeysched+32(SB), X0 1010 MOVL X0, ret+8(FP) 1011 RET 1012 1013 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1014 MOVL p+0(FP), AX // ptr to data 1015 MOVQ (AX), X0 // data 1016 PINSRD $2, h+4(FP), X0 // seed 1017 AESENC runtime·aeskeysched+0(SB), X0 1018 AESENC runtime·aeskeysched+16(SB), X0 1019 AESENC runtime·aeskeysched+32(SB), X0 1020 MOVL X0, ret+8(FP) 1021 RET 1022 1023 // simple mask to get rid of data in the high part of the register. 1024 DATA masks<>+0x00(SB)/4, $0x00000000 1025 DATA masks<>+0x04(SB)/4, $0x00000000 1026 DATA masks<>+0x08(SB)/4, $0x00000000 1027 DATA masks<>+0x0c(SB)/4, $0x00000000 1028 1029 DATA masks<>+0x10(SB)/4, $0x000000ff 1030 DATA masks<>+0x14(SB)/4, $0x00000000 1031 DATA masks<>+0x18(SB)/4, $0x00000000 1032 DATA masks<>+0x1c(SB)/4, $0x00000000 1033 1034 DATA masks<>+0x20(SB)/4, $0x0000ffff 1035 DATA masks<>+0x24(SB)/4, $0x00000000 1036 DATA masks<>+0x28(SB)/4, $0x00000000 1037 DATA masks<>+0x2c(SB)/4, $0x00000000 1038 1039 DATA masks<>+0x30(SB)/4, $0x00ffffff 1040 DATA masks<>+0x34(SB)/4, $0x00000000 1041 DATA masks<>+0x38(SB)/4, $0x00000000 1042 DATA masks<>+0x3c(SB)/4, $0x00000000 1043 1044 DATA masks<>+0x40(SB)/4, $0xffffffff 1045 DATA masks<>+0x44(SB)/4, $0x00000000 1046 DATA masks<>+0x48(SB)/4, $0x00000000 1047 DATA masks<>+0x4c(SB)/4, $0x00000000 1048 1049 DATA masks<>+0x50(SB)/4, $0xffffffff 1050 DATA masks<>+0x54(SB)/4, $0x000000ff 1051 DATA masks<>+0x58(SB)/4, $0x00000000 1052 DATA masks<>+0x5c(SB)/4, $0x00000000 1053 1054 DATA masks<>+0x60(SB)/4, $0xffffffff 1055 DATA masks<>+0x64(SB)/4, $0x0000ffff 1056 DATA masks<>+0x68(SB)/4, $0x00000000 1057 DATA masks<>+0x6c(SB)/4, $0x00000000 1058 1059 DATA masks<>+0x70(SB)/4, $0xffffffff 1060 DATA masks<>+0x74(SB)/4, $0x00ffffff 1061 DATA masks<>+0x78(SB)/4, $0x00000000 1062 DATA masks<>+0x7c(SB)/4, $0x00000000 1063 1064 DATA masks<>+0x80(SB)/4, $0xffffffff 1065 DATA masks<>+0x84(SB)/4, $0xffffffff 1066 DATA masks<>+0x88(SB)/4, $0x00000000 1067 DATA masks<>+0x8c(SB)/4, $0x00000000 1068 1069 DATA masks<>+0x90(SB)/4, $0xffffffff 1070 DATA masks<>+0x94(SB)/4, $0xffffffff 1071 DATA masks<>+0x98(SB)/4, $0x000000ff 1072 DATA masks<>+0x9c(SB)/4, $0x00000000 1073 1074 DATA masks<>+0xa0(SB)/4, $0xffffffff 1075 DATA masks<>+0xa4(SB)/4, $0xffffffff 1076 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1077 DATA masks<>+0xac(SB)/4, $0x00000000 1078 1079 DATA masks<>+0xb0(SB)/4, $0xffffffff 1080 DATA masks<>+0xb4(SB)/4, $0xffffffff 1081 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1082 DATA masks<>+0xbc(SB)/4, $0x00000000 1083 1084 DATA masks<>+0xc0(SB)/4, $0xffffffff 1085 DATA masks<>+0xc4(SB)/4, $0xffffffff 1086 DATA masks<>+0xc8(SB)/4, $0xffffffff 1087 DATA masks<>+0xcc(SB)/4, $0x00000000 1088 1089 DATA masks<>+0xd0(SB)/4, $0xffffffff 1090 DATA masks<>+0xd4(SB)/4, $0xffffffff 1091 DATA masks<>+0xd8(SB)/4, $0xffffffff 1092 DATA masks<>+0xdc(SB)/4, $0x000000ff 1093 1094 DATA masks<>+0xe0(SB)/4, $0xffffffff 1095 DATA masks<>+0xe4(SB)/4, $0xffffffff 1096 DATA masks<>+0xe8(SB)/4, $0xffffffff 1097 DATA masks<>+0xec(SB)/4, $0x0000ffff 1098 1099 DATA masks<>+0xf0(SB)/4, $0xffffffff 1100 DATA masks<>+0xf4(SB)/4, $0xffffffff 1101 DATA masks<>+0xf8(SB)/4, $0xffffffff 1102 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1103 1104 GLOBL masks<>(SB),RODATA,$256 1105 1106 // these are arguments to pshufb. They move data down from 1107 // the high bytes of the register to the low bytes of the register. 1108 // index is how many bytes to move. 1109 DATA shifts<>+0x00(SB)/4, $0x00000000 1110 DATA shifts<>+0x04(SB)/4, $0x00000000 1111 DATA shifts<>+0x08(SB)/4, $0x00000000 1112 DATA shifts<>+0x0c(SB)/4, $0x00000000 1113 1114 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1115 DATA shifts<>+0x14(SB)/4, $0xffffffff 1116 DATA shifts<>+0x18(SB)/4, $0xffffffff 1117 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1118 1119 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1120 DATA shifts<>+0x24(SB)/4, $0xffffffff 1121 DATA shifts<>+0x28(SB)/4, $0xffffffff 1122 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1123 1124 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1125 DATA shifts<>+0x34(SB)/4, $0xffffffff 1126 DATA shifts<>+0x38(SB)/4, $0xffffffff 1127 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1128 1129 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1130 DATA shifts<>+0x44(SB)/4, $0xffffffff 1131 DATA shifts<>+0x48(SB)/4, $0xffffffff 1132 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1133 1134 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1135 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1136 DATA shifts<>+0x58(SB)/4, $0xffffffff 1137 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1138 1139 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1140 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1141 DATA shifts<>+0x68(SB)/4, $0xffffffff 1142 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1143 1144 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1145 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1146 DATA shifts<>+0x78(SB)/4, $0xffffffff 1147 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1148 1149 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1150 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1151 DATA shifts<>+0x88(SB)/4, $0xffffffff 1152 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1153 1154 DATA shifts<>+0x90(SB)/4, $0x0a090807 1155 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1156 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1157 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1158 1159 DATA shifts<>+0xa0(SB)/4, $0x09080706 1160 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1161 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1162 DATA shifts<>+0xac(SB)/4, $0xffffffff 1163 1164 DATA shifts<>+0xb0(SB)/4, $0x08070605 1165 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1166 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1167 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1168 1169 DATA shifts<>+0xc0(SB)/4, $0x07060504 1170 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1171 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1172 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1173 1174 DATA shifts<>+0xd0(SB)/4, $0x06050403 1175 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1176 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1177 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1178 1179 DATA shifts<>+0xe0(SB)/4, $0x05040302 1180 DATA shifts<>+0xe4(SB)/4, $0x09080706 1181 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1182 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1183 1184 DATA shifts<>+0xf0(SB)/4, $0x04030201 1185 DATA shifts<>+0xf4(SB)/4, $0x08070605 1186 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1187 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1188 1189 GLOBL shifts<>(SB),RODATA,$256 1190 1191 TEXT ·checkASM(SB),NOSPLIT,$0-1 1192 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1193 MOVL $masks<>(SB), AX 1194 MOVL $shifts<>(SB), BX 1195 ORL BX, AX 1196 TESTL $15, AX 1197 SETEQ ret+0(FP) 1198 RET 1199 1200 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1201 MOVL a+0(FP), SI 1202 MOVL b+4(FP), DI 1203 MOVL size+8(FP), BX 1204 LEAL ret+12(FP), AX 1205 JMP runtime·memeqbody(SB) 1206 1207 // memequal_varlen(a, b unsafe.Pointer) bool 1208 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1209 MOVL a+0(FP), SI 1210 MOVL b+4(FP), DI 1211 CMPL SI, DI 1212 JEQ eq 1213 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1214 LEAL ret+8(FP), AX 1215 JMP runtime·memeqbody(SB) 1216 eq: 1217 MOVB $1, ret+8(FP) 1218 RET 1219 1220 // eqstring tests whether two strings are equal. 1221 // The compiler guarantees that strings passed 1222 // to eqstring have equal length. 1223 // See runtime_test.go:eqstring_generic for 1224 // equivalent Go code. 1225 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1226 MOVL s1str+0(FP), SI 1227 MOVL s2str+8(FP), DI 1228 CMPL SI, DI 1229 JEQ same 1230 MOVL s1len+4(FP), BX 1231 LEAL v+16(FP), AX 1232 JMP runtime·memeqbody(SB) 1233 same: 1234 MOVB $1, v+16(FP) 1235 RET 1236 1237 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1238 MOVL a_len+4(FP), BX 1239 MOVL b_len+16(FP), CX 1240 CMPL BX, CX 1241 JNE eqret 1242 MOVL a+0(FP), SI 1243 MOVL b+12(FP), DI 1244 LEAL ret+24(FP), AX 1245 JMP runtime·memeqbody(SB) 1246 eqret: 1247 MOVB $0, ret+24(FP) 1248 RET 1249 1250 // a in SI 1251 // b in DI 1252 // count in BX 1253 // address of result byte in AX 1254 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1255 CMPL BX, $4 1256 JB small 1257 1258 // 64 bytes at a time using xmm registers 1259 hugeloop: 1260 CMPL BX, $64 1261 JB bigloop 1262 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1263 JE bigloop 1264 MOVOU (SI), X0 1265 MOVOU (DI), X1 1266 MOVOU 16(SI), X2 1267 MOVOU 16(DI), X3 1268 MOVOU 32(SI), X4 1269 MOVOU 32(DI), X5 1270 MOVOU 48(SI), X6 1271 MOVOU 48(DI), X7 1272 PCMPEQB X1, X0 1273 PCMPEQB X3, X2 1274 PCMPEQB X5, X4 1275 PCMPEQB X7, X6 1276 PAND X2, X0 1277 PAND X6, X4 1278 PAND X4, X0 1279 PMOVMSKB X0, DX 1280 ADDL $64, SI 1281 ADDL $64, DI 1282 SUBL $64, BX 1283 CMPL DX, $0xffff 1284 JEQ hugeloop 1285 MOVB $0, (AX) 1286 RET 1287 1288 // 4 bytes at a time using 32-bit register 1289 bigloop: 1290 CMPL BX, $4 1291 JBE leftover 1292 MOVL (SI), CX 1293 MOVL (DI), DX 1294 ADDL $4, SI 1295 ADDL $4, DI 1296 SUBL $4, BX 1297 CMPL CX, DX 1298 JEQ bigloop 1299 MOVB $0, (AX) 1300 RET 1301 1302 // remaining 0-4 bytes 1303 leftover: 1304 MOVL -4(SI)(BX*1), CX 1305 MOVL -4(DI)(BX*1), DX 1306 CMPL CX, DX 1307 SETEQ (AX) 1308 RET 1309 1310 small: 1311 CMPL BX, $0 1312 JEQ equal 1313 1314 LEAL 0(BX*8), CX 1315 NEGL CX 1316 1317 MOVL SI, DX 1318 CMPB DX, $0xfc 1319 JA si_high 1320 1321 // load at SI won't cross a page boundary. 1322 MOVL (SI), SI 1323 JMP si_finish 1324 si_high: 1325 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1326 MOVL -4(SI)(BX*1), SI 1327 SHRL CX, SI 1328 si_finish: 1329 1330 // same for DI. 1331 MOVL DI, DX 1332 CMPB DX, $0xfc 1333 JA di_high 1334 MOVL (DI), DI 1335 JMP di_finish 1336 di_high: 1337 MOVL -4(DI)(BX*1), DI 1338 SHRL CX, DI 1339 di_finish: 1340 1341 SUBL SI, DI 1342 SHLL CX, DI 1343 equal: 1344 SETEQ (AX) 1345 RET 1346 1347 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1348 MOVL s1_base+0(FP), SI 1349 MOVL s1_len+4(FP), BX 1350 MOVL s2_base+8(FP), DI 1351 MOVL s2_len+12(FP), DX 1352 LEAL ret+16(FP), AX 1353 JMP runtime·cmpbody(SB) 1354 1355 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1356 MOVL s1+0(FP), SI 1357 MOVL s1+4(FP), BX 1358 MOVL s2+12(FP), DI 1359 MOVL s2+16(FP), DX 1360 LEAL ret+24(FP), AX 1361 JMP runtime·cmpbody(SB) 1362 1363 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1364 MOVL s+0(FP), SI 1365 MOVL s_len+4(FP), CX 1366 MOVB c+12(FP), AL 1367 MOVL SI, DI 1368 CLD; REPN; SCASB 1369 JZ 3(PC) 1370 MOVL $-1, ret+16(FP) 1371 RET 1372 SUBL SI, DI 1373 SUBL $1, DI 1374 MOVL DI, ret+16(FP) 1375 RET 1376 1377 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1378 MOVL s+0(FP), SI 1379 MOVL s_len+4(FP), CX 1380 MOVB c+8(FP), AL 1381 MOVL SI, DI 1382 CLD; REPN; SCASB 1383 JZ 3(PC) 1384 MOVL $-1, ret+12(FP) 1385 RET 1386 SUBL SI, DI 1387 SUBL $1, DI 1388 MOVL DI, ret+12(FP) 1389 RET 1390 1391 // input: 1392 // SI = a 1393 // DI = b 1394 // BX = alen 1395 // DX = blen 1396 // AX = address of return word (set to 1/0/-1) 1397 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1398 MOVL DX, BP 1399 SUBL BX, DX // DX = blen-alen 1400 CMOVLGT BX, BP // BP = min(alen, blen) 1401 CMPL SI, DI 1402 JEQ allsame 1403 CMPL BP, $4 1404 JB small 1405 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1406 JE mediumloop 1407 largeloop: 1408 CMPL BP, $16 1409 JB mediumloop 1410 MOVOU (SI), X0 1411 MOVOU (DI), X1 1412 PCMPEQB X0, X1 1413 PMOVMSKB X1, BX 1414 XORL $0xffff, BX // convert EQ to NE 1415 JNE diff16 // branch if at least one byte is not equal 1416 ADDL $16, SI 1417 ADDL $16, DI 1418 SUBL $16, BP 1419 JMP largeloop 1420 1421 diff16: 1422 BSFL BX, BX // index of first byte that differs 1423 XORL DX, DX 1424 MOVB (SI)(BX*1), CX 1425 CMPB CX, (DI)(BX*1) 1426 SETHI DX 1427 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1428 MOVL DX, (AX) 1429 RET 1430 1431 mediumloop: 1432 CMPL BP, $4 1433 JBE _0through4 1434 MOVL (SI), BX 1435 MOVL (DI), CX 1436 CMPL BX, CX 1437 JNE diff4 1438 ADDL $4, SI 1439 ADDL $4, DI 1440 SUBL $4, BP 1441 JMP mediumloop 1442 1443 _0through4: 1444 MOVL -4(SI)(BP*1), BX 1445 MOVL -4(DI)(BP*1), CX 1446 CMPL BX, CX 1447 JEQ allsame 1448 1449 diff4: 1450 BSWAPL BX // reverse order of bytes 1451 BSWAPL CX 1452 XORL BX, CX // find bit differences 1453 BSRL CX, CX // index of highest bit difference 1454 SHRL CX, BX // move a's bit to bottom 1455 ANDL $1, BX // mask bit 1456 LEAL -1(BX*2), BX // 1/0 => +1/-1 1457 MOVL BX, (AX) 1458 RET 1459 1460 // 0-3 bytes in common 1461 small: 1462 LEAL (BP*8), CX 1463 NEGL CX 1464 JEQ allsame 1465 1466 // load si 1467 CMPB SI, $0xfc 1468 JA si_high 1469 MOVL (SI), SI 1470 JMP si_finish 1471 si_high: 1472 MOVL -4(SI)(BP*1), SI 1473 SHRL CX, SI 1474 si_finish: 1475 SHLL CX, SI 1476 1477 // same for di 1478 CMPB DI, $0xfc 1479 JA di_high 1480 MOVL (DI), DI 1481 JMP di_finish 1482 di_high: 1483 MOVL -4(DI)(BP*1), DI 1484 SHRL CX, DI 1485 di_finish: 1486 SHLL CX, DI 1487 1488 BSWAPL SI // reverse order of bytes 1489 BSWAPL DI 1490 XORL SI, DI // find bit differences 1491 JEQ allsame 1492 BSRL DI, CX // index of highest bit difference 1493 SHRL CX, SI // move a's bit to bottom 1494 ANDL $1, SI // mask bit 1495 LEAL -1(SI*2), BX // 1/0 => +1/-1 1496 MOVL BX, (AX) 1497 RET 1498 1499 // all the bytes in common are the same, so we just need 1500 // to compare the lengths. 1501 allsame: 1502 XORL BX, BX 1503 XORL CX, CX 1504 TESTL DX, DX 1505 SETLT BX // 1 if alen > blen 1506 SETEQ CX // 1 if alen == blen 1507 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1508 MOVL BX, (AX) 1509 RET 1510 1511 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1512 get_tls(CX) 1513 MOVL g(CX), AX 1514 MOVL g_m(AX), AX 1515 MOVL m_fastrand(AX), DX 1516 ADDL DX, DX 1517 MOVL DX, BX 1518 XORL $0x88888eef, DX 1519 CMOVLMI BX, DX 1520 MOVL DX, m_fastrand(AX) 1521 MOVL DX, ret+0(FP) 1522 RET 1523 1524 TEXT runtime·return0(SB), NOSPLIT, $0 1525 MOVL $0, AX 1526 RET 1527 1528 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1529 // Must obey the gcc calling convention. 1530 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1531 get_tls(CX) 1532 MOVL g(CX), AX 1533 MOVL g_m(AX), AX 1534 MOVL m_curg(AX), AX 1535 MOVL (g_stack+stack_hi)(AX), AX 1536 RET 1537 1538 // The top-most function running on a goroutine 1539 // returns to goexit+PCQuantum. 1540 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1541 BYTE $0x90 // NOP 1542 CALL runtime·goexit1(SB) // does not return 1543 // traceback from goexit1 must hit code range of goexit 1544 BYTE $0x90 // NOP 1545 1546 // Prefetching doesn't seem to help. 1547 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1548 RET 1549 1550 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1551 RET 1552 1553 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1554 RET 1555 1556 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1557 RET 1558 1559 // Add a module's moduledata to the linked list of moduledata objects. This 1560 // is called from .init_array by a function generated in the linker and so 1561 // follows the platform ABI wrt register preservation -- it only touches AX, 1562 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1563 // instead the pointer to the moduledata is passed in AX. 1564 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1565 MOVL runtime·lastmoduledatap(SB), DX 1566 MOVL AX, moduledata_next(DX) 1567 MOVL AX, runtime·lastmoduledatap(SB) 1568 RET