github.com/dannin/go@v0.0.0-20161031215817-d35dfd405eaa/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 MOVL AX, SI 58 CMPL AX, $0 59 JE nocpuinfo 60 61 // Figure out how to serialize RDTSC. 62 // On Intel processors LFENCE is enough. AMD requires MFENCE. 63 // Don't know about the rest, so let's do MFENCE. 64 CMPL BX, $0x756E6547 // "Genu" 65 JNE notintel 66 CMPL DX, $0x49656E69 // "ineI" 67 JNE notintel 68 CMPL CX, $0x6C65746E // "ntel" 69 JNE notintel 70 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 71 notintel: 72 73 // Load EAX=1 cpuid flags 74 MOVL $1, AX 75 CPUID 76 MOVL CX, AX // Move to global variable clobbers CX when generating PIC 77 MOVL AX, runtime·cpuid_ecx(SB) 78 MOVL DX, runtime·cpuid_edx(SB) 79 80 // Check for MMX support 81 TESTL $(1<<23), DX // MMX 82 JZ bad_proc 83 84 // Load EAX=7/ECX=0 cpuid flags 85 CMPL SI, $7 86 JLT nocpuinfo 87 MOVL $7, AX 88 MOVL $0, CX 89 CPUID 90 MOVL BX, runtime·cpuid_ebx7(SB) 91 92 nocpuinfo: 93 94 // if there is an _cgo_init, call it to let it 95 // initialize and to set up GS. if not, 96 // we set up GS ourselves. 97 MOVL _cgo_init(SB), AX 98 TESTL AX, AX 99 JZ needtls 100 MOVL $setg_gcc<>(SB), BX 101 MOVL BX, 4(SP) 102 MOVL BP, 0(SP) 103 CALL AX 104 105 // update stackguard after _cgo_init 106 MOVL $runtime·g0(SB), CX 107 MOVL (g_stack+stack_lo)(CX), AX 108 ADDL $const__StackGuard, AX 109 MOVL AX, g_stackguard0(CX) 110 MOVL AX, g_stackguard1(CX) 111 112 #ifndef GOOS_windows 113 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 114 JMP ok 115 #endif 116 needtls: 117 #ifdef GOOS_plan9 118 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 119 JMP ok 120 #endif 121 122 // set up %gs 123 CALL runtime·ldt0setup(SB) 124 125 // store through it, to make sure it works 126 get_tls(BX) 127 MOVL $0x123, g(BX) 128 MOVL runtime·m0+m_tls(SB), AX 129 CMPL AX, $0x123 130 JEQ ok 131 MOVL AX, 0 // abort 132 ok: 133 // set up m and g "registers" 134 get_tls(BX) 135 LEAL runtime·g0(SB), DX 136 MOVL DX, g(BX) 137 LEAL runtime·m0(SB), AX 138 139 // save m->g0 = g0 140 MOVL DX, m_g0(AX) 141 // save g0->m = m0 142 MOVL AX, g_m(DX) 143 144 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 145 146 // convention is D is always cleared 147 CLD 148 149 CALL runtime·check(SB) 150 151 // saved argc, argv 152 MOVL 120(SP), AX 153 MOVL AX, 0(SP) 154 MOVL 124(SP), AX 155 MOVL AX, 4(SP) 156 CALL runtime·args(SB) 157 CALL runtime·osinit(SB) 158 CALL runtime·schedinit(SB) 159 160 // create a new goroutine to start program 161 PUSHL $runtime·mainPC(SB) // entry 162 PUSHL $0 // arg size 163 CALL runtime·newproc(SB) 164 POPL AX 165 POPL AX 166 167 // start this M 168 CALL runtime·mstart(SB) 169 170 INT $3 171 RET 172 173 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 174 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 175 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 176 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 177 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 178 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 179 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 180 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 181 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 182 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 183 184 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 185 GLOBL runtime·mainPC(SB),RODATA,$4 186 187 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 188 INT $3 189 RET 190 191 TEXT runtime·asminit(SB),NOSPLIT,$0-0 192 // Linux and MinGW start the FPU in extended double precision. 193 // Other operating systems use double precision. 194 // Change to double precision to match them, 195 // and to match other hardware that only has double. 196 FLDCW runtime·controlWord64(SB) 197 RET 198 199 /* 200 * go-routine 201 */ 202 203 // void gosave(Gobuf*) 204 // save state in Gobuf; setjmp 205 TEXT runtime·gosave(SB), NOSPLIT, $0-4 206 MOVL buf+0(FP), AX // gobuf 207 LEAL buf+0(FP), BX // caller's SP 208 MOVL BX, gobuf_sp(AX) 209 MOVL 0(SP), BX // caller's PC 210 MOVL BX, gobuf_pc(AX) 211 MOVL $0, gobuf_ret(AX) 212 // Assert ctxt is zero. See func save. 213 MOVL gobuf_ctxt(AX), BX 214 TESTL BX, BX 215 JZ 2(PC) 216 CALL runtime·badctxt(SB) 217 get_tls(CX) 218 MOVL g(CX), BX 219 MOVL BX, gobuf_g(AX) 220 RET 221 222 // void gogo(Gobuf*) 223 // restore state from Gobuf; longjmp 224 TEXT runtime·gogo(SB), NOSPLIT, $8-4 225 MOVL buf+0(FP), BX // gobuf 226 227 // If ctxt is not nil, invoke deletion barrier before overwriting. 228 MOVL gobuf_ctxt(BX), DX 229 TESTL DX, DX 230 JZ nilctxt 231 LEAL gobuf_ctxt(BX), AX 232 MOVL AX, 0(SP) 233 MOVL $0, 4(SP) 234 CALL runtime·writebarrierptr_prewrite(SB) 235 MOVL buf+0(FP), BX 236 237 nilctxt: 238 MOVL gobuf_g(BX), DX 239 MOVL 0(DX), CX // make sure g != nil 240 get_tls(CX) 241 MOVL DX, g(CX) 242 MOVL gobuf_sp(BX), SP // restore SP 243 MOVL gobuf_ret(BX), AX 244 MOVL gobuf_ctxt(BX), DX 245 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 246 MOVL $0, gobuf_ret(BX) 247 MOVL $0, gobuf_ctxt(BX) 248 MOVL gobuf_pc(BX), BX 249 JMP BX 250 251 // func mcall(fn func(*g)) 252 // Switch to m->g0's stack, call fn(g). 253 // Fn must never return. It should gogo(&g->sched) 254 // to keep running g. 255 TEXT runtime·mcall(SB), NOSPLIT, $0-4 256 MOVL fn+0(FP), DI 257 258 get_tls(DX) 259 MOVL g(DX), AX // save state in g->sched 260 MOVL 0(SP), BX // caller's PC 261 MOVL BX, (g_sched+gobuf_pc)(AX) 262 LEAL fn+0(FP), BX // caller's SP 263 MOVL BX, (g_sched+gobuf_sp)(AX) 264 MOVL AX, (g_sched+gobuf_g)(AX) 265 266 // switch to m->g0 & its stack, call fn 267 MOVL g(DX), BX 268 MOVL g_m(BX), BX 269 MOVL m_g0(BX), SI 270 CMPL SI, AX // if g == m->g0 call badmcall 271 JNE 3(PC) 272 MOVL $runtime·badmcall(SB), AX 273 JMP AX 274 MOVL SI, g(DX) // g = m->g0 275 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 276 PUSHL AX 277 MOVL DI, DX 278 MOVL 0(DI), DI 279 CALL DI 280 POPL AX 281 MOVL $runtime·badmcall2(SB), AX 282 JMP AX 283 RET 284 285 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 286 // of the G stack. We need to distinguish the routine that 287 // lives at the bottom of the G stack from the one that lives 288 // at the top of the system stack because the one at the top of 289 // the system stack terminates the stack walk (see topofstack()). 290 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 291 RET 292 293 // func systemstack(fn func()) 294 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 295 MOVL fn+0(FP), DI // DI = fn 296 get_tls(CX) 297 MOVL g(CX), AX // AX = g 298 MOVL g_m(AX), BX // BX = m 299 300 MOVL m_gsignal(BX), DX // DX = gsignal 301 CMPL AX, DX 302 JEQ noswitch 303 304 MOVL m_g0(BX), DX // DX = g0 305 CMPL AX, DX 306 JEQ noswitch 307 308 MOVL m_curg(BX), BP 309 CMPL AX, BP 310 JEQ switch 311 312 // Bad: g is not gsignal, not g0, not curg. What is it? 313 // Hide call from linker nosplit analysis. 314 MOVL $runtime·badsystemstack(SB), AX 315 CALL AX 316 317 switch: 318 // save our state in g->sched. Pretend to 319 // be systemstack_switch if the G stack is scanned. 320 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 321 MOVL SP, (g_sched+gobuf_sp)(AX) 322 MOVL AX, (g_sched+gobuf_g)(AX) 323 324 // switch to g0 325 get_tls(CX) 326 MOVL DX, g(CX) 327 MOVL (g_sched+gobuf_sp)(DX), BX 328 // make it look like mstart called systemstack on g0, to stop traceback 329 SUBL $4, BX 330 MOVL $runtime·mstart(SB), DX 331 MOVL DX, 0(BX) 332 MOVL BX, SP 333 334 // call target function 335 MOVL DI, DX 336 MOVL 0(DI), DI 337 CALL DI 338 339 // switch back to g 340 get_tls(CX) 341 MOVL g(CX), AX 342 MOVL g_m(AX), BX 343 MOVL m_curg(BX), AX 344 MOVL AX, g(CX) 345 MOVL (g_sched+gobuf_sp)(AX), SP 346 MOVL $0, (g_sched+gobuf_sp)(AX) 347 RET 348 349 noswitch: 350 // already on system stack, just call directly 351 MOVL DI, DX 352 MOVL 0(DI), DI 353 CALL DI 354 RET 355 356 /* 357 * support for morestack 358 */ 359 360 // Called during function prolog when more stack is needed. 361 // 362 // The traceback routines see morestack on a g0 as being 363 // the top of a stack (for example, morestack calling newstack 364 // calling the scheduler calling newm calling gc), so we must 365 // record an argument size. For that purpose, it has no arguments. 366 TEXT runtime·morestack(SB),NOSPLIT,$0-0 367 // Cannot grow scheduler stack (m->g0). 368 get_tls(CX) 369 MOVL g(CX), BX 370 MOVL g_m(BX), BX 371 MOVL m_g0(BX), SI 372 CMPL g(CX), SI 373 JNE 3(PC) 374 CALL runtime·badmorestackg0(SB) 375 INT $3 376 377 // Cannot grow signal stack. 378 MOVL m_gsignal(BX), SI 379 CMPL g(CX), SI 380 JNE 3(PC) 381 CALL runtime·badmorestackgsignal(SB) 382 INT $3 383 384 // Called from f. 385 // Set m->morebuf to f's caller. 386 MOVL 4(SP), DI // f's caller's PC 387 MOVL DI, (m_morebuf+gobuf_pc)(BX) 388 LEAL 8(SP), CX // f's caller's SP 389 MOVL CX, (m_morebuf+gobuf_sp)(BX) 390 get_tls(CX) 391 MOVL g(CX), SI 392 MOVL SI, (m_morebuf+gobuf_g)(BX) 393 394 // Set g->sched to context in f. 395 MOVL 0(SP), AX // f's PC 396 MOVL AX, (g_sched+gobuf_pc)(SI) 397 MOVL SI, (g_sched+gobuf_g)(SI) 398 LEAL 4(SP), AX // f's SP 399 MOVL AX, (g_sched+gobuf_sp)(SI) 400 // newstack will fill gobuf.ctxt. 401 402 // Call newstack on m->g0's stack. 403 MOVL m_g0(BX), BP 404 MOVL BP, g(CX) 405 MOVL (g_sched+gobuf_sp)(BP), AX 406 MOVL -4(AX), BX // fault if CALL would, before smashing SP 407 MOVL AX, SP 408 PUSHL DX // ctxt argument 409 CALL runtime·newstack(SB) 410 MOVL $0, 0x1003 // crash if newstack returns 411 POPL DX // keep balance check happy 412 RET 413 414 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 415 MOVL $0, DX 416 JMP runtime·morestack(SB) 417 418 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 419 // We came here via a RET to an overwritten return PC. 420 // AX may be live. Other registers are available. 421 422 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 423 get_tls(CX) 424 MOVL g(CX), CX 425 MOVL (g_stkbar+slice_array)(CX), DX 426 MOVL g_stkbarPos(CX), BX 427 IMULL $stkbar__size, BX // Too big for SIB. 428 MOVL stkbar_savedLRVal(DX)(BX*1), BX 429 // Record that this stack barrier was hit. 430 ADDL $1, g_stkbarPos(CX) 431 // Jump to the original return PC. 432 JMP BX 433 434 // reflectcall: call a function with the given argument list 435 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 436 // we don't have variable-sized frames, so we use a small number 437 // of constant-sized-frame functions to encode a few bits of size in the pc. 438 // Caution: ugly multiline assembly macros in your future! 439 440 #define DISPATCH(NAME,MAXSIZE) \ 441 CMPL CX, $MAXSIZE; \ 442 JA 3(PC); \ 443 MOVL $NAME(SB), AX; \ 444 JMP AX 445 // Note: can't just "JMP NAME(SB)" - bad inlining results. 446 447 TEXT reflect·call(SB), NOSPLIT, $0-0 448 JMP ·reflectcall(SB) 449 450 TEXT ·reflectcall(SB), NOSPLIT, $0-20 451 MOVL argsize+12(FP), CX 452 DISPATCH(runtime·call16, 16) 453 DISPATCH(runtime·call32, 32) 454 DISPATCH(runtime·call64, 64) 455 DISPATCH(runtime·call128, 128) 456 DISPATCH(runtime·call256, 256) 457 DISPATCH(runtime·call512, 512) 458 DISPATCH(runtime·call1024, 1024) 459 DISPATCH(runtime·call2048, 2048) 460 DISPATCH(runtime·call4096, 4096) 461 DISPATCH(runtime·call8192, 8192) 462 DISPATCH(runtime·call16384, 16384) 463 DISPATCH(runtime·call32768, 32768) 464 DISPATCH(runtime·call65536, 65536) 465 DISPATCH(runtime·call131072, 131072) 466 DISPATCH(runtime·call262144, 262144) 467 DISPATCH(runtime·call524288, 524288) 468 DISPATCH(runtime·call1048576, 1048576) 469 DISPATCH(runtime·call2097152, 2097152) 470 DISPATCH(runtime·call4194304, 4194304) 471 DISPATCH(runtime·call8388608, 8388608) 472 DISPATCH(runtime·call16777216, 16777216) 473 DISPATCH(runtime·call33554432, 33554432) 474 DISPATCH(runtime·call67108864, 67108864) 475 DISPATCH(runtime·call134217728, 134217728) 476 DISPATCH(runtime·call268435456, 268435456) 477 DISPATCH(runtime·call536870912, 536870912) 478 DISPATCH(runtime·call1073741824, 1073741824) 479 MOVL $runtime·badreflectcall(SB), AX 480 JMP AX 481 482 #define CALLFN(NAME,MAXSIZE) \ 483 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 484 NO_LOCAL_POINTERS; \ 485 /* copy arguments to stack */ \ 486 MOVL argptr+8(FP), SI; \ 487 MOVL argsize+12(FP), CX; \ 488 MOVL SP, DI; \ 489 REP;MOVSB; \ 490 /* call function */ \ 491 MOVL f+4(FP), DX; \ 492 MOVL (DX), AX; \ 493 PCDATA $PCDATA_StackMapIndex, $0; \ 494 CALL AX; \ 495 /* copy return values back */ \ 496 MOVL argtype+0(FP), DX; \ 497 MOVL argptr+8(FP), DI; \ 498 MOVL argsize+12(FP), CX; \ 499 MOVL retoffset+16(FP), BX; \ 500 MOVL SP, SI; \ 501 ADDL BX, DI; \ 502 ADDL BX, SI; \ 503 SUBL BX, CX; \ 504 CALL callRet<>(SB); \ 505 RET 506 507 // callRet copies return values back at the end of call*. This is a 508 // separate function so it can allocate stack space for the arguments 509 // to reflectcallmove. It does not follow the Go ABI; it expects its 510 // arguments in registers. 511 TEXT callRet<>(SB), NOSPLIT, $16-0 512 MOVL DX, 0(SP) 513 MOVL DI, 4(SP) 514 MOVL SI, 8(SP) 515 MOVL CX, 12(SP) 516 CALL runtime·reflectcallmove(SB) 517 RET 518 519 CALLFN(·call16, 16) 520 CALLFN(·call32, 32) 521 CALLFN(·call64, 64) 522 CALLFN(·call128, 128) 523 CALLFN(·call256, 256) 524 CALLFN(·call512, 512) 525 CALLFN(·call1024, 1024) 526 CALLFN(·call2048, 2048) 527 CALLFN(·call4096, 4096) 528 CALLFN(·call8192, 8192) 529 CALLFN(·call16384, 16384) 530 CALLFN(·call32768, 32768) 531 CALLFN(·call65536, 65536) 532 CALLFN(·call131072, 131072) 533 CALLFN(·call262144, 262144) 534 CALLFN(·call524288, 524288) 535 CALLFN(·call1048576, 1048576) 536 CALLFN(·call2097152, 2097152) 537 CALLFN(·call4194304, 4194304) 538 CALLFN(·call8388608, 8388608) 539 CALLFN(·call16777216, 16777216) 540 CALLFN(·call33554432, 33554432) 541 CALLFN(·call67108864, 67108864) 542 CALLFN(·call134217728, 134217728) 543 CALLFN(·call268435456, 268435456) 544 CALLFN(·call536870912, 536870912) 545 CALLFN(·call1073741824, 1073741824) 546 547 TEXT runtime·procyield(SB),NOSPLIT,$0-0 548 MOVL cycles+0(FP), AX 549 again: 550 PAUSE 551 SUBL $1, AX 552 JNZ again 553 RET 554 555 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 556 // Stores are already ordered on x86, so this is just a 557 // compile barrier. 558 RET 559 560 // void jmpdefer(fn, sp); 561 // called from deferreturn. 562 // 1. pop the caller 563 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 564 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 565 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 566 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 567 // 3. jmp to the argument 568 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 569 MOVL fv+0(FP), DX // fn 570 MOVL argp+4(FP), BX // caller sp 571 LEAL -4(BX), SP // caller sp after CALL 572 #ifdef GOBUILDMODE_shared 573 SUBL $16, (SP) // return to CALL again 574 #else 575 SUBL $5, (SP) // return to CALL again 576 #endif 577 MOVL 0(DX), BX 578 JMP BX // but first run the deferred function 579 580 // Save state of caller into g->sched. 581 TEXT gosave<>(SB),NOSPLIT,$0 582 PUSHL AX 583 PUSHL BX 584 get_tls(BX) 585 MOVL g(BX), BX 586 LEAL arg+0(FP), AX 587 MOVL AX, (g_sched+gobuf_sp)(BX) 588 MOVL -4(AX), AX 589 MOVL AX, (g_sched+gobuf_pc)(BX) 590 MOVL $0, (g_sched+gobuf_ret)(BX) 591 // Assert ctxt is zero. See func save. 592 MOVL (g_sched+gobuf_ctxt)(BX), AX 593 TESTL AX, AX 594 JZ 2(PC) 595 CALL runtime·badctxt(SB) 596 POPL BX 597 POPL AX 598 RET 599 600 // func asmcgocall(fn, arg unsafe.Pointer) int32 601 // Call fn(arg) on the scheduler stack, 602 // aligned appropriately for the gcc ABI. 603 // See cgocall.go for more details. 604 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 605 MOVL fn+0(FP), AX 606 MOVL arg+4(FP), BX 607 608 MOVL SP, DX 609 610 // Figure out if we need to switch to m->g0 stack. 611 // We get called to create new OS threads too, and those 612 // come in on the m->g0 stack already. 613 get_tls(CX) 614 MOVL g(CX), BP 615 MOVL g_m(BP), BP 616 MOVL m_g0(BP), SI 617 MOVL g(CX), DI 618 CMPL SI, DI 619 JEQ noswitch 620 CALL gosave<>(SB) 621 get_tls(CX) 622 MOVL SI, g(CX) 623 MOVL (g_sched+gobuf_sp)(SI), SP 624 625 noswitch: 626 // Now on a scheduling stack (a pthread-created stack). 627 SUBL $32, SP 628 ANDL $~15, SP // alignment, perhaps unnecessary 629 MOVL DI, 8(SP) // save g 630 MOVL (g_stack+stack_hi)(DI), DI 631 SUBL DX, DI 632 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 633 MOVL BX, 0(SP) // first argument in x86-32 ABI 634 CALL AX 635 636 // Restore registers, g, stack pointer. 637 get_tls(CX) 638 MOVL 8(SP), DI 639 MOVL (g_stack+stack_hi)(DI), SI 640 SUBL 4(SP), SI 641 MOVL DI, g(CX) 642 MOVL SI, SP 643 644 MOVL AX, ret+8(FP) 645 RET 646 647 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 648 // Turn the fn into a Go func (by taking its address) and call 649 // cgocallback_gofunc. 650 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 651 LEAL fn+0(FP), AX 652 MOVL AX, 0(SP) 653 MOVL frame+4(FP), AX 654 MOVL AX, 4(SP) 655 MOVL framesize+8(FP), AX 656 MOVL AX, 8(SP) 657 MOVL ctxt+12(FP), AX 658 MOVL AX, 12(SP) 659 MOVL $runtime·cgocallback_gofunc(SB), AX 660 CALL AX 661 RET 662 663 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 664 // See cgocall.go for more details. 665 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 666 NO_LOCAL_POINTERS 667 668 // If g is nil, Go did not create the current thread. 669 // Call needm to obtain one for temporary use. 670 // In this case, we're running on the thread stack, so there's 671 // lots of space, but the linker doesn't know. Hide the call from 672 // the linker analysis by using an indirect call through AX. 673 get_tls(CX) 674 #ifdef GOOS_windows 675 MOVL $0, BP 676 CMPL CX, $0 677 JEQ 2(PC) // TODO 678 #endif 679 MOVL g(CX), BP 680 CMPL BP, $0 681 JEQ needm 682 MOVL g_m(BP), BP 683 MOVL BP, DX // saved copy of oldm 684 JMP havem 685 needm: 686 MOVL $0, 0(SP) 687 MOVL $runtime·needm(SB), AX 688 CALL AX 689 MOVL 0(SP), DX 690 get_tls(CX) 691 MOVL g(CX), BP 692 MOVL g_m(BP), BP 693 694 // Set m->sched.sp = SP, so that if a panic happens 695 // during the function we are about to execute, it will 696 // have a valid SP to run on the g0 stack. 697 // The next few lines (after the havem label) 698 // will save this SP onto the stack and then write 699 // the same SP back to m->sched.sp. That seems redundant, 700 // but if an unrecovered panic happens, unwindm will 701 // restore the g->sched.sp from the stack location 702 // and then systemstack will try to use it. If we don't set it here, 703 // that restored SP will be uninitialized (typically 0) and 704 // will not be usable. 705 MOVL m_g0(BP), SI 706 MOVL SP, (g_sched+gobuf_sp)(SI) 707 708 havem: 709 // Now there's a valid m, and we're running on its m->g0. 710 // Save current m->g0->sched.sp on stack and then set it to SP. 711 // Save current sp in m->g0->sched.sp in preparation for 712 // switch back to m->curg stack. 713 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 714 MOVL m_g0(BP), SI 715 MOVL (g_sched+gobuf_sp)(SI), AX 716 MOVL AX, 0(SP) 717 MOVL SP, (g_sched+gobuf_sp)(SI) 718 719 // Switch to m->curg stack and call runtime.cgocallbackg. 720 // Because we are taking over the execution of m->curg 721 // but *not* resuming what had been running, we need to 722 // save that information (m->curg->sched) so we can restore it. 723 // We can restore m->curg->sched.sp easily, because calling 724 // runtime.cgocallbackg leaves SP unchanged upon return. 725 // To save m->curg->sched.pc, we push it onto the stack. 726 // This has the added benefit that it looks to the traceback 727 // routine like cgocallbackg is going to return to that 728 // PC (because the frame we allocate below has the same 729 // size as cgocallback_gofunc's frame declared above) 730 // so that the traceback will seamlessly trace back into 731 // the earlier calls. 732 // 733 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 734 // 8(SP) is unused. 735 MOVL m_curg(BP), SI 736 MOVL SI, g(CX) 737 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 738 MOVL (g_sched+gobuf_pc)(SI), BP 739 MOVL BP, -4(DI) 740 MOVL ctxt+12(FP), CX 741 LEAL -(4+12)(DI), SP 742 MOVL DX, 4(SP) 743 MOVL CX, 0(SP) 744 CALL runtime·cgocallbackg(SB) 745 MOVL 4(SP), DX 746 747 // Restore g->sched (== m->curg->sched) from saved values. 748 get_tls(CX) 749 MOVL g(CX), SI 750 MOVL 12(SP), BP 751 MOVL BP, (g_sched+gobuf_pc)(SI) 752 LEAL (12+4)(SP), DI 753 MOVL DI, (g_sched+gobuf_sp)(SI) 754 755 // Switch back to m->g0's stack and restore m->g0->sched.sp. 756 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 757 // so we do not have to restore it.) 758 MOVL g(CX), BP 759 MOVL g_m(BP), BP 760 MOVL m_g0(BP), SI 761 MOVL SI, g(CX) 762 MOVL (g_sched+gobuf_sp)(SI), SP 763 MOVL 0(SP), AX 764 MOVL AX, (g_sched+gobuf_sp)(SI) 765 766 // If the m on entry was nil, we called needm above to borrow an m 767 // for the duration of the call. Since the call is over, return it with dropm. 768 CMPL DX, $0 769 JNE 3(PC) 770 MOVL $runtime·dropm(SB), AX 771 CALL AX 772 773 // Done! 774 RET 775 776 // void setg(G*); set g. for use by needm. 777 TEXT runtime·setg(SB), NOSPLIT, $0-4 778 MOVL gg+0(FP), BX 779 #ifdef GOOS_windows 780 CMPL BX, $0 781 JNE settls 782 MOVL $0, 0x14(FS) 783 RET 784 settls: 785 MOVL g_m(BX), AX 786 LEAL m_tls(AX), AX 787 MOVL AX, 0x14(FS) 788 #endif 789 get_tls(CX) 790 MOVL BX, g(CX) 791 RET 792 793 // void setg_gcc(G*); set g. for use by gcc 794 TEXT setg_gcc<>(SB), NOSPLIT, $0 795 get_tls(AX) 796 MOVL gg+0(FP), DX 797 MOVL DX, g(AX) 798 RET 799 800 // check that SP is in range [g->stack.lo, g->stack.hi) 801 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 802 get_tls(CX) 803 MOVL g(CX), AX 804 CMPL (g_stack+stack_hi)(AX), SP 805 JHI 2(PC) 806 INT $3 807 CMPL SP, (g_stack+stack_lo)(AX) 808 JHI 2(PC) 809 INT $3 810 RET 811 812 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 813 MOVL argp+0(FP),AX // addr of first arg 814 MOVL -4(AX),AX // get calling pc 815 CMPL AX, runtime·stackBarrierPC(SB) 816 JNE nobar 817 // Get original return PC. 818 CALL runtime·nextBarrierPC(SB) 819 MOVL 0(SP), AX 820 nobar: 821 MOVL AX, ret+4(FP) 822 RET 823 824 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 825 MOVL argp+0(FP),AX // addr of first arg 826 MOVL pc+4(FP), BX 827 MOVL -4(AX), DX 828 CMPL DX, runtime·stackBarrierPC(SB) 829 JEQ setbar 830 MOVL BX, -4(AX) // set calling pc 831 RET 832 setbar: 833 // Set the stack barrier return PC. 834 MOVL BX, 0(SP) 835 CALL runtime·setNextBarrierPC(SB) 836 RET 837 838 // func cputicks() int64 839 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 840 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 841 JEQ done 842 CMPB runtime·lfenceBeforeRdtsc(SB), $1 843 JNE mfence 844 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 845 JMP done 846 mfence: 847 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 848 done: 849 RDTSC 850 MOVL AX, ret_lo+0(FP) 851 MOVL DX, ret_hi+4(FP) 852 RET 853 854 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 855 // set up ldt 7 to point at m0.tls 856 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 857 // the entry number is just a hint. setldt will set up GS with what it used. 858 MOVL $7, 0(SP) 859 LEAL runtime·m0+m_tls(SB), AX 860 MOVL AX, 4(SP) 861 MOVL $32, 8(SP) // sizeof(tls array) 862 CALL runtime·setldt(SB) 863 RET 864 865 TEXT runtime·emptyfunc(SB),0,$0-0 866 RET 867 868 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 869 // redirects to memhash(p, h, size) using the size 870 // stored in the closure. 871 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 872 GO_ARGS 873 NO_LOCAL_POINTERS 874 MOVL p+0(FP), AX 875 MOVL h+4(FP), BX 876 MOVL 4(DX), CX 877 MOVL AX, 0(SP) 878 MOVL BX, 4(SP) 879 MOVL CX, 8(SP) 880 CALL runtime·memhash(SB) 881 MOVL 12(SP), AX 882 MOVL AX, ret+8(FP) 883 RET 884 885 // hash function using AES hardware instructions 886 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 887 MOVL p+0(FP), AX // ptr to data 888 MOVL s+8(FP), BX // size 889 LEAL ret+12(FP), DX 890 JMP runtime·aeshashbody(SB) 891 892 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 893 MOVL p+0(FP), AX // ptr to string object 894 MOVL 4(AX), BX // length of string 895 MOVL (AX), AX // string data 896 LEAL ret+8(FP), DX 897 JMP runtime·aeshashbody(SB) 898 899 // AX: data 900 // BX: length 901 // DX: address to put return value 902 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 903 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 904 PINSRW $4, BX, X0 // 16 bits of length 905 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 906 MOVO X0, X1 // save unscrambled seed 907 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 908 AESENC X0, X0 // scramble seed 909 910 CMPL BX, $16 911 JB aes0to15 912 JE aes16 913 CMPL BX, $32 914 JBE aes17to32 915 CMPL BX, $64 916 JBE aes33to64 917 JMP aes65plus 918 919 aes0to15: 920 TESTL BX, BX 921 JE aes0 922 923 ADDL $16, AX 924 TESTW $0xff0, AX 925 JE endofpage 926 927 // 16 bytes loaded at this address won't cross 928 // a page boundary, so we can load it directly. 929 MOVOU -16(AX), X1 930 ADDL BX, BX 931 PAND masks<>(SB)(BX*8), X1 932 933 final1: 934 AESENC X0, X1 // scramble input, xor in seed 935 AESENC X1, X1 // scramble combo 2 times 936 AESENC X1, X1 937 MOVL X1, (DX) 938 RET 939 940 endofpage: 941 // address ends in 1111xxxx. Might be up against 942 // a page boundary, so load ending at last byte. 943 // Then shift bytes down using pshufb. 944 MOVOU -32(AX)(BX*1), X1 945 ADDL BX, BX 946 PSHUFB shifts<>(SB)(BX*8), X1 947 JMP final1 948 949 aes0: 950 // Return scrambled input seed 951 AESENC X0, X0 952 MOVL X0, (DX) 953 RET 954 955 aes16: 956 MOVOU (AX), X1 957 JMP final1 958 959 aes17to32: 960 // make second starting seed 961 PXOR runtime·aeskeysched+16(SB), X1 962 AESENC X1, X1 963 964 // load data to be hashed 965 MOVOU (AX), X2 966 MOVOU -16(AX)(BX*1), X3 967 968 // scramble 3 times 969 AESENC X0, X2 970 AESENC X1, X3 971 AESENC X2, X2 972 AESENC X3, X3 973 AESENC X2, X2 974 AESENC X3, X3 975 976 // combine results 977 PXOR X3, X2 978 MOVL X2, (DX) 979 RET 980 981 aes33to64: 982 // make 3 more starting seeds 983 MOVO X1, X2 984 MOVO X1, X3 985 PXOR runtime·aeskeysched+16(SB), X1 986 PXOR runtime·aeskeysched+32(SB), X2 987 PXOR runtime·aeskeysched+48(SB), X3 988 AESENC X1, X1 989 AESENC X2, X2 990 AESENC X3, X3 991 992 MOVOU (AX), X4 993 MOVOU 16(AX), X5 994 MOVOU -32(AX)(BX*1), X6 995 MOVOU -16(AX)(BX*1), X7 996 997 AESENC X0, X4 998 AESENC X1, X5 999 AESENC X2, X6 1000 AESENC X3, X7 1001 1002 AESENC X4, X4 1003 AESENC X5, X5 1004 AESENC X6, X6 1005 AESENC X7, X7 1006 1007 AESENC X4, X4 1008 AESENC X5, X5 1009 AESENC X6, X6 1010 AESENC X7, X7 1011 1012 PXOR X6, X4 1013 PXOR X7, X5 1014 PXOR X5, X4 1015 MOVL X4, (DX) 1016 RET 1017 1018 aes65plus: 1019 // make 3 more starting seeds 1020 MOVO X1, X2 1021 MOVO X1, X3 1022 PXOR runtime·aeskeysched+16(SB), X1 1023 PXOR runtime·aeskeysched+32(SB), X2 1024 PXOR runtime·aeskeysched+48(SB), X3 1025 AESENC X1, X1 1026 AESENC X2, X2 1027 AESENC X3, X3 1028 1029 // start with last (possibly overlapping) block 1030 MOVOU -64(AX)(BX*1), X4 1031 MOVOU -48(AX)(BX*1), X5 1032 MOVOU -32(AX)(BX*1), X6 1033 MOVOU -16(AX)(BX*1), X7 1034 1035 // scramble state once 1036 AESENC X0, X4 1037 AESENC X1, X5 1038 AESENC X2, X6 1039 AESENC X3, X7 1040 1041 // compute number of remaining 64-byte blocks 1042 DECL BX 1043 SHRL $6, BX 1044 1045 aesloop: 1046 // scramble state, xor in a block 1047 MOVOU (AX), X0 1048 MOVOU 16(AX), X1 1049 MOVOU 32(AX), X2 1050 MOVOU 48(AX), X3 1051 AESENC X0, X4 1052 AESENC X1, X5 1053 AESENC X2, X6 1054 AESENC X3, X7 1055 1056 // scramble state 1057 AESENC X4, X4 1058 AESENC X5, X5 1059 AESENC X6, X6 1060 AESENC X7, X7 1061 1062 ADDL $64, AX 1063 DECL BX 1064 JNE aesloop 1065 1066 // 2 more scrambles to finish 1067 AESENC X4, X4 1068 AESENC X5, X5 1069 AESENC X6, X6 1070 AESENC X7, X7 1071 1072 AESENC X4, X4 1073 AESENC X5, X5 1074 AESENC X6, X6 1075 AESENC X7, X7 1076 1077 PXOR X6, X4 1078 PXOR X7, X5 1079 PXOR X5, X4 1080 MOVL X4, (DX) 1081 RET 1082 1083 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1084 MOVL p+0(FP), AX // ptr to data 1085 MOVL h+4(FP), X0 // seed 1086 PINSRD $1, (AX), X0 // data 1087 AESENC runtime·aeskeysched+0(SB), X0 1088 AESENC runtime·aeskeysched+16(SB), X0 1089 AESENC runtime·aeskeysched+32(SB), X0 1090 MOVL X0, ret+8(FP) 1091 RET 1092 1093 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1094 MOVL p+0(FP), AX // ptr to data 1095 MOVQ (AX), X0 // data 1096 PINSRD $2, h+4(FP), X0 // seed 1097 AESENC runtime·aeskeysched+0(SB), X0 1098 AESENC runtime·aeskeysched+16(SB), X0 1099 AESENC runtime·aeskeysched+32(SB), X0 1100 MOVL X0, ret+8(FP) 1101 RET 1102 1103 // simple mask to get rid of data in the high part of the register. 1104 DATA masks<>+0x00(SB)/4, $0x00000000 1105 DATA masks<>+0x04(SB)/4, $0x00000000 1106 DATA masks<>+0x08(SB)/4, $0x00000000 1107 DATA masks<>+0x0c(SB)/4, $0x00000000 1108 1109 DATA masks<>+0x10(SB)/4, $0x000000ff 1110 DATA masks<>+0x14(SB)/4, $0x00000000 1111 DATA masks<>+0x18(SB)/4, $0x00000000 1112 DATA masks<>+0x1c(SB)/4, $0x00000000 1113 1114 DATA masks<>+0x20(SB)/4, $0x0000ffff 1115 DATA masks<>+0x24(SB)/4, $0x00000000 1116 DATA masks<>+0x28(SB)/4, $0x00000000 1117 DATA masks<>+0x2c(SB)/4, $0x00000000 1118 1119 DATA masks<>+0x30(SB)/4, $0x00ffffff 1120 DATA masks<>+0x34(SB)/4, $0x00000000 1121 DATA masks<>+0x38(SB)/4, $0x00000000 1122 DATA masks<>+0x3c(SB)/4, $0x00000000 1123 1124 DATA masks<>+0x40(SB)/4, $0xffffffff 1125 DATA masks<>+0x44(SB)/4, $0x00000000 1126 DATA masks<>+0x48(SB)/4, $0x00000000 1127 DATA masks<>+0x4c(SB)/4, $0x00000000 1128 1129 DATA masks<>+0x50(SB)/4, $0xffffffff 1130 DATA masks<>+0x54(SB)/4, $0x000000ff 1131 DATA masks<>+0x58(SB)/4, $0x00000000 1132 DATA masks<>+0x5c(SB)/4, $0x00000000 1133 1134 DATA masks<>+0x60(SB)/4, $0xffffffff 1135 DATA masks<>+0x64(SB)/4, $0x0000ffff 1136 DATA masks<>+0x68(SB)/4, $0x00000000 1137 DATA masks<>+0x6c(SB)/4, $0x00000000 1138 1139 DATA masks<>+0x70(SB)/4, $0xffffffff 1140 DATA masks<>+0x74(SB)/4, $0x00ffffff 1141 DATA masks<>+0x78(SB)/4, $0x00000000 1142 DATA masks<>+0x7c(SB)/4, $0x00000000 1143 1144 DATA masks<>+0x80(SB)/4, $0xffffffff 1145 DATA masks<>+0x84(SB)/4, $0xffffffff 1146 DATA masks<>+0x88(SB)/4, $0x00000000 1147 DATA masks<>+0x8c(SB)/4, $0x00000000 1148 1149 DATA masks<>+0x90(SB)/4, $0xffffffff 1150 DATA masks<>+0x94(SB)/4, $0xffffffff 1151 DATA masks<>+0x98(SB)/4, $0x000000ff 1152 DATA masks<>+0x9c(SB)/4, $0x00000000 1153 1154 DATA masks<>+0xa0(SB)/4, $0xffffffff 1155 DATA masks<>+0xa4(SB)/4, $0xffffffff 1156 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1157 DATA masks<>+0xac(SB)/4, $0x00000000 1158 1159 DATA masks<>+0xb0(SB)/4, $0xffffffff 1160 DATA masks<>+0xb4(SB)/4, $0xffffffff 1161 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1162 DATA masks<>+0xbc(SB)/4, $0x00000000 1163 1164 DATA masks<>+0xc0(SB)/4, $0xffffffff 1165 DATA masks<>+0xc4(SB)/4, $0xffffffff 1166 DATA masks<>+0xc8(SB)/4, $0xffffffff 1167 DATA masks<>+0xcc(SB)/4, $0x00000000 1168 1169 DATA masks<>+0xd0(SB)/4, $0xffffffff 1170 DATA masks<>+0xd4(SB)/4, $0xffffffff 1171 DATA masks<>+0xd8(SB)/4, $0xffffffff 1172 DATA masks<>+0xdc(SB)/4, $0x000000ff 1173 1174 DATA masks<>+0xe0(SB)/4, $0xffffffff 1175 DATA masks<>+0xe4(SB)/4, $0xffffffff 1176 DATA masks<>+0xe8(SB)/4, $0xffffffff 1177 DATA masks<>+0xec(SB)/4, $0x0000ffff 1178 1179 DATA masks<>+0xf0(SB)/4, $0xffffffff 1180 DATA masks<>+0xf4(SB)/4, $0xffffffff 1181 DATA masks<>+0xf8(SB)/4, $0xffffffff 1182 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1183 1184 GLOBL masks<>(SB),RODATA,$256 1185 1186 // these are arguments to pshufb. They move data down from 1187 // the high bytes of the register to the low bytes of the register. 1188 // index is how many bytes to move. 1189 DATA shifts<>+0x00(SB)/4, $0x00000000 1190 DATA shifts<>+0x04(SB)/4, $0x00000000 1191 DATA shifts<>+0x08(SB)/4, $0x00000000 1192 DATA shifts<>+0x0c(SB)/4, $0x00000000 1193 1194 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1195 DATA shifts<>+0x14(SB)/4, $0xffffffff 1196 DATA shifts<>+0x18(SB)/4, $0xffffffff 1197 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1198 1199 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1200 DATA shifts<>+0x24(SB)/4, $0xffffffff 1201 DATA shifts<>+0x28(SB)/4, $0xffffffff 1202 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1203 1204 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1205 DATA shifts<>+0x34(SB)/4, $0xffffffff 1206 DATA shifts<>+0x38(SB)/4, $0xffffffff 1207 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1208 1209 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1210 DATA shifts<>+0x44(SB)/4, $0xffffffff 1211 DATA shifts<>+0x48(SB)/4, $0xffffffff 1212 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1213 1214 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1215 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1216 DATA shifts<>+0x58(SB)/4, $0xffffffff 1217 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1218 1219 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1220 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1221 DATA shifts<>+0x68(SB)/4, $0xffffffff 1222 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1223 1224 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1225 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1226 DATA shifts<>+0x78(SB)/4, $0xffffffff 1227 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1228 1229 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1230 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1231 DATA shifts<>+0x88(SB)/4, $0xffffffff 1232 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1233 1234 DATA shifts<>+0x90(SB)/4, $0x0a090807 1235 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1236 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1237 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1238 1239 DATA shifts<>+0xa0(SB)/4, $0x09080706 1240 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1241 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1242 DATA shifts<>+0xac(SB)/4, $0xffffffff 1243 1244 DATA shifts<>+0xb0(SB)/4, $0x08070605 1245 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1246 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1247 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1248 1249 DATA shifts<>+0xc0(SB)/4, $0x07060504 1250 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1251 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1252 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1253 1254 DATA shifts<>+0xd0(SB)/4, $0x06050403 1255 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1256 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1257 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1258 1259 DATA shifts<>+0xe0(SB)/4, $0x05040302 1260 DATA shifts<>+0xe4(SB)/4, $0x09080706 1261 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1262 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1263 1264 DATA shifts<>+0xf0(SB)/4, $0x04030201 1265 DATA shifts<>+0xf4(SB)/4, $0x08070605 1266 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1267 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1268 1269 GLOBL shifts<>(SB),RODATA,$256 1270 1271 TEXT ·checkASM(SB),NOSPLIT,$0-1 1272 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1273 MOVL $masks<>(SB), AX 1274 MOVL $shifts<>(SB), BX 1275 ORL BX, AX 1276 TESTL $15, AX 1277 SETEQ ret+0(FP) 1278 RET 1279 1280 // memequal(p, q unsafe.Pointer, size uintptr) bool 1281 TEXT runtime·memequal(SB),NOSPLIT,$0-13 1282 MOVL a+0(FP), SI 1283 MOVL b+4(FP), DI 1284 CMPL SI, DI 1285 JEQ eq 1286 MOVL size+8(FP), BX 1287 LEAL ret+12(FP), AX 1288 JMP runtime·memeqbody(SB) 1289 eq: 1290 MOVB $1, ret+12(FP) 1291 RET 1292 1293 // memequal_varlen(a, b unsafe.Pointer) bool 1294 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1295 MOVL a+0(FP), SI 1296 MOVL b+4(FP), DI 1297 CMPL SI, DI 1298 JEQ eq 1299 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1300 LEAL ret+8(FP), AX 1301 JMP runtime·memeqbody(SB) 1302 eq: 1303 MOVB $1, ret+8(FP) 1304 RET 1305 1306 // eqstring tests whether two strings are equal. 1307 // The compiler guarantees that strings passed 1308 // to eqstring have equal length. 1309 // See runtime_test.go:eqstring_generic for 1310 // equivalent Go code. 1311 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1312 MOVL s1_base+0(FP), SI 1313 MOVL s2_base+8(FP), DI 1314 CMPL SI, DI 1315 JEQ same 1316 MOVL s1_len+4(FP), BX 1317 LEAL ret+16(FP), AX 1318 JMP runtime·memeqbody(SB) 1319 same: 1320 MOVB $1, ret+16(FP) 1321 RET 1322 1323 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1324 MOVL a_len+4(FP), BX 1325 MOVL b_len+16(FP), CX 1326 CMPL BX, CX 1327 JNE eqret 1328 MOVL a+0(FP), SI 1329 MOVL b+12(FP), DI 1330 LEAL ret+24(FP), AX 1331 JMP runtime·memeqbody(SB) 1332 eqret: 1333 MOVB $0, ret+24(FP) 1334 RET 1335 1336 // a in SI 1337 // b in DI 1338 // count in BX 1339 // address of result byte in AX 1340 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1341 CMPL BX, $4 1342 JB small 1343 1344 // 64 bytes at a time using xmm registers 1345 hugeloop: 1346 CMPL BX, $64 1347 JB bigloop 1348 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1349 JE bigloop 1350 MOVOU (SI), X0 1351 MOVOU (DI), X1 1352 MOVOU 16(SI), X2 1353 MOVOU 16(DI), X3 1354 MOVOU 32(SI), X4 1355 MOVOU 32(DI), X5 1356 MOVOU 48(SI), X6 1357 MOVOU 48(DI), X7 1358 PCMPEQB X1, X0 1359 PCMPEQB X3, X2 1360 PCMPEQB X5, X4 1361 PCMPEQB X7, X6 1362 PAND X2, X0 1363 PAND X6, X4 1364 PAND X4, X0 1365 PMOVMSKB X0, DX 1366 ADDL $64, SI 1367 ADDL $64, DI 1368 SUBL $64, BX 1369 CMPL DX, $0xffff 1370 JEQ hugeloop 1371 MOVB $0, (AX) 1372 RET 1373 1374 // 4 bytes at a time using 32-bit register 1375 bigloop: 1376 CMPL BX, $4 1377 JBE leftover 1378 MOVL (SI), CX 1379 MOVL (DI), DX 1380 ADDL $4, SI 1381 ADDL $4, DI 1382 SUBL $4, BX 1383 CMPL CX, DX 1384 JEQ bigloop 1385 MOVB $0, (AX) 1386 RET 1387 1388 // remaining 0-4 bytes 1389 leftover: 1390 MOVL -4(SI)(BX*1), CX 1391 MOVL -4(DI)(BX*1), DX 1392 CMPL CX, DX 1393 SETEQ (AX) 1394 RET 1395 1396 small: 1397 CMPL BX, $0 1398 JEQ equal 1399 1400 LEAL 0(BX*8), CX 1401 NEGL CX 1402 1403 MOVL SI, DX 1404 CMPB DX, $0xfc 1405 JA si_high 1406 1407 // load at SI won't cross a page boundary. 1408 MOVL (SI), SI 1409 JMP si_finish 1410 si_high: 1411 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1412 MOVL -4(SI)(BX*1), SI 1413 SHRL CX, SI 1414 si_finish: 1415 1416 // same for DI. 1417 MOVL DI, DX 1418 CMPB DX, $0xfc 1419 JA di_high 1420 MOVL (DI), DI 1421 JMP di_finish 1422 di_high: 1423 MOVL -4(DI)(BX*1), DI 1424 SHRL CX, DI 1425 di_finish: 1426 1427 SUBL SI, DI 1428 SHLL CX, DI 1429 equal: 1430 SETEQ (AX) 1431 RET 1432 1433 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1434 MOVL s1_base+0(FP), SI 1435 MOVL s1_len+4(FP), BX 1436 MOVL s2_base+8(FP), DI 1437 MOVL s2_len+12(FP), DX 1438 LEAL ret+16(FP), AX 1439 JMP runtime·cmpbody(SB) 1440 1441 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1442 MOVL s1+0(FP), SI 1443 MOVL s1+4(FP), BX 1444 MOVL s2+12(FP), DI 1445 MOVL s2+16(FP), DX 1446 LEAL ret+24(FP), AX 1447 JMP runtime·cmpbody(SB) 1448 1449 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1450 MOVL s+0(FP), SI 1451 MOVL s_len+4(FP), CX 1452 MOVB c+12(FP), AL 1453 MOVL SI, DI 1454 CLD; REPN; SCASB 1455 JZ 3(PC) 1456 MOVL $-1, ret+16(FP) 1457 RET 1458 SUBL SI, DI 1459 SUBL $1, DI 1460 MOVL DI, ret+16(FP) 1461 RET 1462 1463 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1464 MOVL s+0(FP), SI 1465 MOVL s_len+4(FP), CX 1466 MOVB c+8(FP), AL 1467 MOVL SI, DI 1468 CLD; REPN; SCASB 1469 JZ 3(PC) 1470 MOVL $-1, ret+12(FP) 1471 RET 1472 SUBL SI, DI 1473 SUBL $1, DI 1474 MOVL DI, ret+12(FP) 1475 RET 1476 1477 // input: 1478 // SI = a 1479 // DI = b 1480 // BX = alen 1481 // DX = blen 1482 // AX = address of return word (set to 1/0/-1) 1483 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1484 MOVL DX, BP 1485 SUBL BX, DX // DX = blen-alen 1486 JLE 2(PC) 1487 MOVL BX, BP // BP = min(alen, blen) 1488 CMPL SI, DI 1489 JEQ allsame 1490 CMPL BP, $4 1491 JB small 1492 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1493 JE mediumloop 1494 largeloop: 1495 CMPL BP, $16 1496 JB mediumloop 1497 MOVOU (SI), X0 1498 MOVOU (DI), X1 1499 PCMPEQB X0, X1 1500 PMOVMSKB X1, BX 1501 XORL $0xffff, BX // convert EQ to NE 1502 JNE diff16 // branch if at least one byte is not equal 1503 ADDL $16, SI 1504 ADDL $16, DI 1505 SUBL $16, BP 1506 JMP largeloop 1507 1508 diff16: 1509 BSFL BX, BX // index of first byte that differs 1510 XORL DX, DX 1511 MOVB (SI)(BX*1), CX 1512 CMPB CX, (DI)(BX*1) 1513 SETHI DX 1514 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1515 MOVL DX, (AX) 1516 RET 1517 1518 mediumloop: 1519 CMPL BP, $4 1520 JBE _0through4 1521 MOVL (SI), BX 1522 MOVL (DI), CX 1523 CMPL BX, CX 1524 JNE diff4 1525 ADDL $4, SI 1526 ADDL $4, DI 1527 SUBL $4, BP 1528 JMP mediumloop 1529 1530 _0through4: 1531 MOVL -4(SI)(BP*1), BX 1532 MOVL -4(DI)(BP*1), CX 1533 CMPL BX, CX 1534 JEQ allsame 1535 1536 diff4: 1537 BSWAPL BX // reverse order of bytes 1538 BSWAPL CX 1539 XORL BX, CX // find bit differences 1540 BSRL CX, CX // index of highest bit difference 1541 SHRL CX, BX // move a's bit to bottom 1542 ANDL $1, BX // mask bit 1543 LEAL -1(BX*2), BX // 1/0 => +1/-1 1544 MOVL BX, (AX) 1545 RET 1546 1547 // 0-3 bytes in common 1548 small: 1549 LEAL (BP*8), CX 1550 NEGL CX 1551 JEQ allsame 1552 1553 // load si 1554 CMPB SI, $0xfc 1555 JA si_high 1556 MOVL (SI), SI 1557 JMP si_finish 1558 si_high: 1559 MOVL -4(SI)(BP*1), SI 1560 SHRL CX, SI 1561 si_finish: 1562 SHLL CX, SI 1563 1564 // same for di 1565 CMPB DI, $0xfc 1566 JA di_high 1567 MOVL (DI), DI 1568 JMP di_finish 1569 di_high: 1570 MOVL -4(DI)(BP*1), DI 1571 SHRL CX, DI 1572 di_finish: 1573 SHLL CX, DI 1574 1575 BSWAPL SI // reverse order of bytes 1576 BSWAPL DI 1577 XORL SI, DI // find bit differences 1578 JEQ allsame 1579 BSRL DI, CX // index of highest bit difference 1580 SHRL CX, SI // move a's bit to bottom 1581 ANDL $1, SI // mask bit 1582 LEAL -1(SI*2), BX // 1/0 => +1/-1 1583 MOVL BX, (AX) 1584 RET 1585 1586 // all the bytes in common are the same, so we just need 1587 // to compare the lengths. 1588 allsame: 1589 XORL BX, BX 1590 XORL CX, CX 1591 TESTL DX, DX 1592 SETLT BX // 1 if alen > blen 1593 SETEQ CX // 1 if alen == blen 1594 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1595 MOVL BX, (AX) 1596 RET 1597 1598 TEXT runtime·fastrand(SB), NOSPLIT, $0-4 1599 get_tls(CX) 1600 MOVL g(CX), AX 1601 MOVL g_m(AX), AX 1602 MOVL m_fastrand(AX), DX 1603 ADDL DX, DX 1604 MOVL DX, BX 1605 XORL $0x88888eef, DX 1606 JPL 2(PC) 1607 MOVL BX, DX 1608 MOVL DX, m_fastrand(AX) 1609 MOVL DX, ret+0(FP) 1610 RET 1611 1612 TEXT runtime·return0(SB), NOSPLIT, $0 1613 MOVL $0, AX 1614 RET 1615 1616 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1617 // Must obey the gcc calling convention. 1618 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1619 get_tls(CX) 1620 MOVL g(CX), AX 1621 MOVL g_m(AX), AX 1622 MOVL m_curg(AX), AX 1623 MOVL (g_stack+stack_hi)(AX), AX 1624 RET 1625 1626 // The top-most function running on a goroutine 1627 // returns to goexit+PCQuantum. 1628 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1629 BYTE $0x90 // NOP 1630 CALL runtime·goexit1(SB) // does not return 1631 // traceback from goexit1 must hit code range of goexit 1632 BYTE $0x90 // NOP 1633 1634 // Prefetching doesn't seem to help. 1635 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1636 RET 1637 1638 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1639 RET 1640 1641 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1642 RET 1643 1644 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1645 RET 1646 1647 // Add a module's moduledata to the linked list of moduledata objects. This 1648 // is called from .init_array by a function generated in the linker and so 1649 // follows the platform ABI wrt register preservation -- it only touches AX, 1650 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1651 // instead the pointer to the moduledata is passed in AX. 1652 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1653 MOVL runtime·lastmoduledatap(SB), DX 1654 MOVL AX, moduledata_next(DX) 1655 MOVL AX, runtime·lastmoduledatap(SB) 1656 RET 1657 1658 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1659 MOVL a+0(FP), AX 1660 MOVL AX, 0(SP) 1661 MOVL $0, 4(SP) 1662 FMOVV 0(SP), F0 1663 FMOVDP F0, ret+4(FP) 1664 RET 1665 1666 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1667 FMOVD a+0(FP), F0 1668 FSTCW 0(SP) 1669 FLDCW runtime·controlWord64trunc(SB) 1670 FMOVVP F0, 4(SP) 1671 FLDCW 0(SP) 1672 MOVL 4(SP), AX 1673 MOVL AX, ret+8(FP) 1674 RET