github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 MOVL AX, SI 58 CMPL AX, $0 59 JE nocpuinfo 60 61 // Figure out how to serialize RDTSC. 62 // On Intel processors LFENCE is enough. AMD requires MFENCE. 63 // Don't know about the rest, so let's do MFENCE. 64 CMPL BX, $0x756E6547 // "Genu" 65 JNE notintel 66 CMPL DX, $0x49656E69 // "ineI" 67 JNE notintel 68 CMPL CX, $0x6C65746E // "ntel" 69 JNE notintel 70 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 71 notintel: 72 73 // Load EAX=1 cpuid flags 74 MOVL $1, AX 75 CPUID 76 MOVL CX, AX // Move to global variable clobbers CX when generating PIC 77 MOVL AX, runtime·cpuid_ecx(SB) 78 MOVL DX, runtime·cpuid_edx(SB) 79 80 // Check for MMX support 81 TESTL $(1<<23), DX // MMX 82 JZ bad_proc 83 84 // Load EAX=7/ECX=0 cpuid flags 85 CMPL SI, $7 86 JLT nocpuinfo 87 MOVL $7, AX 88 MOVL $0, CX 89 CPUID 90 MOVL BX, runtime·cpuid_ebx7(SB) 91 92 nocpuinfo: 93 94 // if there is an _cgo_init, call it to let it 95 // initialize and to set up GS. if not, 96 // we set up GS ourselves. 97 MOVL _cgo_init(SB), AX 98 TESTL AX, AX 99 JZ needtls 100 MOVL $setg_gcc<>(SB), BX 101 MOVL BX, 4(SP) 102 MOVL BP, 0(SP) 103 CALL AX 104 105 // update stackguard after _cgo_init 106 MOVL $runtime·g0(SB), CX 107 MOVL (g_stack+stack_lo)(CX), AX 108 ADDL $const__StackGuard, AX 109 MOVL AX, g_stackguard0(CX) 110 MOVL AX, g_stackguard1(CX) 111 112 #ifndef GOOS_windows 113 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 114 JMP ok 115 #endif 116 needtls: 117 #ifdef GOOS_plan9 118 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 119 JMP ok 120 #endif 121 122 // set up %gs 123 CALL runtime·ldt0setup(SB) 124 125 // store through it, to make sure it works 126 get_tls(BX) 127 MOVL $0x123, g(BX) 128 MOVL runtime·m0+m_tls(SB), AX 129 CMPL AX, $0x123 130 JEQ ok 131 MOVL AX, 0 // abort 132 ok: 133 // set up m and g "registers" 134 get_tls(BX) 135 LEAL runtime·g0(SB), DX 136 MOVL DX, g(BX) 137 LEAL runtime·m0(SB), AX 138 139 // save m->g0 = g0 140 MOVL DX, m_g0(AX) 141 // save g0->m = m0 142 MOVL AX, g_m(DX) 143 144 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 145 146 // convention is D is always cleared 147 CLD 148 149 CALL runtime·check(SB) 150 151 // saved argc, argv 152 MOVL 120(SP), AX 153 MOVL AX, 0(SP) 154 MOVL 124(SP), AX 155 MOVL AX, 4(SP) 156 CALL runtime·args(SB) 157 CALL runtime·osinit(SB) 158 CALL runtime·schedinit(SB) 159 160 // create a new goroutine to start program 161 PUSHL $runtime·mainPC(SB) // entry 162 PUSHL $0 // arg size 163 CALL runtime·newproc(SB) 164 POPL AX 165 POPL AX 166 167 // start this M 168 CALL runtime·mstart(SB) 169 170 INT $3 171 RET 172 173 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 174 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 175 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 176 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 177 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 178 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 179 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 180 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 181 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 182 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 183 184 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 185 GLOBL runtime·mainPC(SB),RODATA,$4 186 187 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 188 INT $3 189 RET 190 191 TEXT runtime·asminit(SB),NOSPLIT,$0-0 192 // Linux and MinGW start the FPU in extended double precision. 193 // Other operating systems use double precision. 194 // Change to double precision to match them, 195 // and to match other hardware that only has double. 196 PUSHL $0x27F 197 FLDCW 0(SP) 198 POPL AX 199 RET 200 201 /* 202 * go-routine 203 */ 204 205 // void gosave(Gobuf*) 206 // save state in Gobuf; setjmp 207 TEXT runtime·gosave(SB), NOSPLIT, $0-4 208 MOVL buf+0(FP), AX // gobuf 209 LEAL buf+0(FP), BX // caller's SP 210 MOVL BX, gobuf_sp(AX) 211 MOVL 0(SP), BX // caller's PC 212 MOVL BX, gobuf_pc(AX) 213 MOVL $0, gobuf_ret(AX) 214 MOVL $0, gobuf_ctxt(AX) 215 get_tls(CX) 216 MOVL g(CX), BX 217 MOVL BX, gobuf_g(AX) 218 RET 219 220 // void gogo(Gobuf*) 221 // restore state from Gobuf; longjmp 222 TEXT runtime·gogo(SB), NOSPLIT, $0-4 223 MOVL buf+0(FP), BX // gobuf 224 MOVL gobuf_g(BX), DX 225 MOVL 0(DX), CX // make sure g != nil 226 get_tls(CX) 227 MOVL DX, g(CX) 228 MOVL gobuf_sp(BX), SP // restore SP 229 MOVL gobuf_ret(BX), AX 230 MOVL gobuf_ctxt(BX), DX 231 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 232 MOVL $0, gobuf_ret(BX) 233 MOVL $0, gobuf_ctxt(BX) 234 MOVL gobuf_pc(BX), BX 235 JMP BX 236 237 // func mcall(fn func(*g)) 238 // Switch to m->g0's stack, call fn(g). 239 // Fn must never return. It should gogo(&g->sched) 240 // to keep running g. 241 TEXT runtime·mcall(SB), NOSPLIT, $0-4 242 MOVL fn+0(FP), DI 243 244 get_tls(DX) 245 MOVL g(DX), AX // save state in g->sched 246 MOVL 0(SP), BX // caller's PC 247 MOVL BX, (g_sched+gobuf_pc)(AX) 248 LEAL fn+0(FP), BX // caller's SP 249 MOVL BX, (g_sched+gobuf_sp)(AX) 250 MOVL AX, (g_sched+gobuf_g)(AX) 251 252 // switch to m->g0 & its stack, call fn 253 MOVL g(DX), BX 254 MOVL g_m(BX), BX 255 MOVL m_g0(BX), SI 256 CMPL SI, AX // if g == m->g0 call badmcall 257 JNE 3(PC) 258 MOVL $runtime·badmcall(SB), AX 259 JMP AX 260 MOVL SI, g(DX) // g = m->g0 261 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 262 PUSHL AX 263 MOVL DI, DX 264 MOVL 0(DI), DI 265 CALL DI 266 POPL AX 267 MOVL $runtime·badmcall2(SB), AX 268 JMP AX 269 RET 270 271 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 272 // of the G stack. We need to distinguish the routine that 273 // lives at the bottom of the G stack from the one that lives 274 // at the top of the system stack because the one at the top of 275 // the system stack terminates the stack walk (see topofstack()). 276 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 277 RET 278 279 // func systemstack(fn func()) 280 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 281 MOVL fn+0(FP), DI // DI = fn 282 get_tls(CX) 283 MOVL g(CX), AX // AX = g 284 MOVL g_m(AX), BX // BX = m 285 286 MOVL m_gsignal(BX), DX // DX = gsignal 287 CMPL AX, DX 288 JEQ noswitch 289 290 MOVL m_g0(BX), DX // DX = g0 291 CMPL AX, DX 292 JEQ noswitch 293 294 MOVL m_curg(BX), BP 295 CMPL AX, BP 296 JEQ switch 297 298 // Bad: g is not gsignal, not g0, not curg. What is it? 299 // Hide call from linker nosplit analysis. 300 MOVL $runtime·badsystemstack(SB), AX 301 CALL AX 302 303 switch: 304 // save our state in g->sched. Pretend to 305 // be systemstack_switch if the G stack is scanned. 306 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 307 MOVL SP, (g_sched+gobuf_sp)(AX) 308 MOVL AX, (g_sched+gobuf_g)(AX) 309 310 // switch to g0 311 get_tls(CX) 312 MOVL DX, g(CX) 313 MOVL (g_sched+gobuf_sp)(DX), BX 314 // make it look like mstart called systemstack on g0, to stop traceback 315 SUBL $4, BX 316 MOVL $runtime·mstart(SB), DX 317 MOVL DX, 0(BX) 318 MOVL BX, SP 319 320 // call target function 321 MOVL DI, DX 322 MOVL 0(DI), DI 323 CALL DI 324 325 // switch back to g 326 get_tls(CX) 327 MOVL g(CX), AX 328 MOVL g_m(AX), BX 329 MOVL m_curg(BX), AX 330 MOVL AX, g(CX) 331 MOVL (g_sched+gobuf_sp)(AX), SP 332 MOVL $0, (g_sched+gobuf_sp)(AX) 333 RET 334 335 noswitch: 336 // already on system stack, just call directly 337 MOVL DI, DX 338 MOVL 0(DI), DI 339 CALL DI 340 RET 341 342 /* 343 * support for morestack 344 */ 345 346 // Called during function prolog when more stack is needed. 347 // 348 // The traceback routines see morestack on a g0 as being 349 // the top of a stack (for example, morestack calling newstack 350 // calling the scheduler calling newm calling gc), so we must 351 // record an argument size. For that purpose, it has no arguments. 352 TEXT runtime·morestack(SB),NOSPLIT,$0-0 353 // Cannot grow scheduler stack (m->g0). 354 get_tls(CX) 355 MOVL g(CX), BX 356 MOVL g_m(BX), BX 357 MOVL m_g0(BX), SI 358 CMPL g(CX), SI 359 JNE 2(PC) 360 INT $3 361 362 // Cannot grow signal stack. 363 MOVL m_gsignal(BX), SI 364 CMPL g(CX), SI 365 JNE 2(PC) 366 INT $3 367 368 // Called from f. 369 // Set m->morebuf to f's caller. 370 MOVL 4(SP), DI // f's caller's PC 371 MOVL DI, (m_morebuf+gobuf_pc)(BX) 372 LEAL 8(SP), CX // f's caller's SP 373 MOVL CX, (m_morebuf+gobuf_sp)(BX) 374 get_tls(CX) 375 MOVL g(CX), SI 376 MOVL SI, (m_morebuf+gobuf_g)(BX) 377 378 // Set g->sched to context in f. 379 MOVL 0(SP), AX // f's PC 380 MOVL AX, (g_sched+gobuf_pc)(SI) 381 MOVL SI, (g_sched+gobuf_g)(SI) 382 LEAL 4(SP), AX // f's SP 383 MOVL AX, (g_sched+gobuf_sp)(SI) 384 MOVL DX, (g_sched+gobuf_ctxt)(SI) 385 386 // Call newstack on m->g0's stack. 387 MOVL m_g0(BX), BP 388 MOVL BP, g(CX) 389 MOVL (g_sched+gobuf_sp)(BP), AX 390 MOVL -4(AX), BX // fault if CALL would, before smashing SP 391 MOVL AX, SP 392 CALL runtime·newstack(SB) 393 MOVL $0, 0x1003 // crash if newstack returns 394 RET 395 396 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 397 MOVL $0, DX 398 JMP runtime·morestack(SB) 399 400 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 401 // We came here via a RET to an overwritten return PC. 402 // AX may be live. Other registers are available. 403 404 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 405 get_tls(CX) 406 MOVL g(CX), CX 407 MOVL (g_stkbar+slice_array)(CX), DX 408 MOVL g_stkbarPos(CX), BX 409 IMULL $stkbar__size, BX // Too big for SIB. 410 MOVL stkbar_savedLRVal(DX)(BX*1), BX 411 // Record that this stack barrier was hit. 412 ADDL $1, g_stkbarPos(CX) 413 // Jump to the original return PC. 414 JMP BX 415 416 // reflectcall: call a function with the given argument list 417 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 418 // we don't have variable-sized frames, so we use a small number 419 // of constant-sized-frame functions to encode a few bits of size in the pc. 420 // Caution: ugly multiline assembly macros in your future! 421 422 #define DISPATCH(NAME,MAXSIZE) \ 423 CMPL CX, $MAXSIZE; \ 424 JA 3(PC); \ 425 MOVL $NAME(SB), AX; \ 426 JMP AX 427 // Note: can't just "JMP NAME(SB)" - bad inlining results. 428 429 TEXT reflect·call(SB), NOSPLIT, $0-0 430 JMP ·reflectcall(SB) 431 432 TEXT ·reflectcall(SB), NOSPLIT, $0-20 433 MOVL argsize+12(FP), CX 434 DISPATCH(runtime·call16, 16) 435 DISPATCH(runtime·call32, 32) 436 DISPATCH(runtime·call64, 64) 437 DISPATCH(runtime·call128, 128) 438 DISPATCH(runtime·call256, 256) 439 DISPATCH(runtime·call512, 512) 440 DISPATCH(runtime·call1024, 1024) 441 DISPATCH(runtime·call2048, 2048) 442 DISPATCH(runtime·call4096, 4096) 443 DISPATCH(runtime·call8192, 8192) 444 DISPATCH(runtime·call16384, 16384) 445 DISPATCH(runtime·call32768, 32768) 446 DISPATCH(runtime·call65536, 65536) 447 DISPATCH(runtime·call131072, 131072) 448 DISPATCH(runtime·call262144, 262144) 449 DISPATCH(runtime·call524288, 524288) 450 DISPATCH(runtime·call1048576, 1048576) 451 DISPATCH(runtime·call2097152, 2097152) 452 DISPATCH(runtime·call4194304, 4194304) 453 DISPATCH(runtime·call8388608, 8388608) 454 DISPATCH(runtime·call16777216, 16777216) 455 DISPATCH(runtime·call33554432, 33554432) 456 DISPATCH(runtime·call67108864, 67108864) 457 DISPATCH(runtime·call134217728, 134217728) 458 DISPATCH(runtime·call268435456, 268435456) 459 DISPATCH(runtime·call536870912, 536870912) 460 DISPATCH(runtime·call1073741824, 1073741824) 461 MOVL $runtime·badreflectcall(SB), AX 462 JMP AX 463 464 #define CALLFN(NAME,MAXSIZE) \ 465 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 466 NO_LOCAL_POINTERS; \ 467 /* copy arguments to stack */ \ 468 MOVL argptr+8(FP), SI; \ 469 MOVL argsize+12(FP), CX; \ 470 MOVL SP, DI; \ 471 REP;MOVSB; \ 472 /* call function */ \ 473 MOVL f+4(FP), DX; \ 474 MOVL (DX), AX; \ 475 PCDATA $PCDATA_StackMapIndex, $0; \ 476 CALL AX; \ 477 /* copy return values back */ \ 478 MOVL argptr+8(FP), DI; \ 479 MOVL argsize+12(FP), CX; \ 480 MOVL retoffset+16(FP), BX; \ 481 MOVL SP, SI; \ 482 ADDL BX, DI; \ 483 ADDL BX, SI; \ 484 SUBL BX, CX; \ 485 REP;MOVSB; \ 486 /* execute write barrier updates */ \ 487 MOVL argtype+0(FP), DX; \ 488 MOVL argptr+8(FP), DI; \ 489 MOVL argsize+12(FP), CX; \ 490 MOVL retoffset+16(FP), BX; \ 491 MOVL DX, 0(SP); \ 492 MOVL DI, 4(SP); \ 493 MOVL CX, 8(SP); \ 494 MOVL BX, 12(SP); \ 495 CALL runtime·callwritebarrier(SB); \ 496 RET 497 498 CALLFN(·call16, 16) 499 CALLFN(·call32, 32) 500 CALLFN(·call64, 64) 501 CALLFN(·call128, 128) 502 CALLFN(·call256, 256) 503 CALLFN(·call512, 512) 504 CALLFN(·call1024, 1024) 505 CALLFN(·call2048, 2048) 506 CALLFN(·call4096, 4096) 507 CALLFN(·call8192, 8192) 508 CALLFN(·call16384, 16384) 509 CALLFN(·call32768, 32768) 510 CALLFN(·call65536, 65536) 511 CALLFN(·call131072, 131072) 512 CALLFN(·call262144, 262144) 513 CALLFN(·call524288, 524288) 514 CALLFN(·call1048576, 1048576) 515 CALLFN(·call2097152, 2097152) 516 CALLFN(·call4194304, 4194304) 517 CALLFN(·call8388608, 8388608) 518 CALLFN(·call16777216, 16777216) 519 CALLFN(·call33554432, 33554432) 520 CALLFN(·call67108864, 67108864) 521 CALLFN(·call134217728, 134217728) 522 CALLFN(·call268435456, 268435456) 523 CALLFN(·call536870912, 536870912) 524 CALLFN(·call1073741824, 1073741824) 525 526 TEXT runtime·procyield(SB),NOSPLIT,$0-0 527 MOVL cycles+0(FP), AX 528 again: 529 PAUSE 530 SUBL $1, AX 531 JNZ again 532 RET 533 534 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 535 // Stores are already ordered on x86, so this is just a 536 // compile barrier. 537 RET 538 539 // void jmpdefer(fn, sp); 540 // called from deferreturn. 541 // 1. pop the caller 542 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 543 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 544 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 545 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 546 // 3. jmp to the argument 547 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 548 MOVL fv+0(FP), DX // fn 549 MOVL argp+4(FP), BX // caller sp 550 LEAL -4(BX), SP // caller sp after CALL 551 #ifdef GOBUILDMODE_shared 552 SUBL $16, (SP) // return to CALL again 553 #else 554 SUBL $5, (SP) // return to CALL again 555 #endif 556 MOVL 0(DX), BX 557 JMP BX // but first run the deferred function 558 559 // Save state of caller into g->sched. 560 TEXT gosave<>(SB),NOSPLIT,$0 561 PUSHL AX 562 PUSHL BX 563 get_tls(BX) 564 MOVL g(BX), BX 565 LEAL arg+0(FP), AX 566 MOVL AX, (g_sched+gobuf_sp)(BX) 567 MOVL -4(AX), AX 568 MOVL AX, (g_sched+gobuf_pc)(BX) 569 MOVL $0, (g_sched+gobuf_ret)(BX) 570 MOVL $0, (g_sched+gobuf_ctxt)(BX) 571 POPL BX 572 POPL AX 573 RET 574 575 // func asmcgocall(fn, arg unsafe.Pointer) int32 576 // Call fn(arg) on the scheduler stack, 577 // aligned appropriately for the gcc ABI. 578 // See cgocall.go for more details. 579 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 580 MOVL fn+0(FP), AX 581 MOVL arg+4(FP), BX 582 583 MOVL SP, DX 584 585 // Figure out if we need to switch to m->g0 stack. 586 // We get called to create new OS threads too, and those 587 // come in on the m->g0 stack already. 588 get_tls(CX) 589 MOVL g(CX), BP 590 MOVL g_m(BP), BP 591 MOVL m_g0(BP), SI 592 MOVL g(CX), DI 593 CMPL SI, DI 594 JEQ noswitch 595 CALL gosave<>(SB) 596 get_tls(CX) 597 MOVL SI, g(CX) 598 MOVL (g_sched+gobuf_sp)(SI), SP 599 600 noswitch: 601 // Now on a scheduling stack (a pthread-created stack). 602 SUBL $32, SP 603 ANDL $~15, SP // alignment, perhaps unnecessary 604 MOVL DI, 8(SP) // save g 605 MOVL (g_stack+stack_hi)(DI), DI 606 SUBL DX, DI 607 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 608 MOVL BX, 0(SP) // first argument in x86-32 ABI 609 CALL AX 610 611 // Restore registers, g, stack pointer. 612 get_tls(CX) 613 MOVL 8(SP), DI 614 MOVL (g_stack+stack_hi)(DI), SI 615 SUBL 4(SP), SI 616 MOVL DI, g(CX) 617 MOVL SI, SP 618 619 MOVL AX, ret+8(FP) 620 RET 621 622 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 623 // Turn the fn into a Go func (by taking its address) and call 624 // cgocallback_gofunc. 625 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 626 LEAL fn+0(FP), AX 627 MOVL AX, 0(SP) 628 MOVL frame+4(FP), AX 629 MOVL AX, 4(SP) 630 MOVL framesize+8(FP), AX 631 MOVL AX, 8(SP) 632 MOVL ctxt+12(FP), AX 633 MOVL AX, 12(SP) 634 MOVL $runtime·cgocallback_gofunc(SB), AX 635 CALL AX 636 RET 637 638 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 639 // See cgocall.go for more details. 640 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 641 NO_LOCAL_POINTERS 642 643 // If g is nil, Go did not create the current thread. 644 // Call needm to obtain one for temporary use. 645 // In this case, we're running on the thread stack, so there's 646 // lots of space, but the linker doesn't know. Hide the call from 647 // the linker analysis by using an indirect call through AX. 648 get_tls(CX) 649 #ifdef GOOS_windows 650 MOVL $0, BP 651 CMPL CX, $0 652 JEQ 2(PC) // TODO 653 #endif 654 MOVL g(CX), BP 655 CMPL BP, $0 656 JEQ needm 657 MOVL g_m(BP), BP 658 MOVL BP, DX // saved copy of oldm 659 JMP havem 660 needm: 661 MOVL $0, 0(SP) 662 MOVL $runtime·needm(SB), AX 663 CALL AX 664 MOVL 0(SP), DX 665 get_tls(CX) 666 MOVL g(CX), BP 667 MOVL g_m(BP), BP 668 669 // Set m->sched.sp = SP, so that if a panic happens 670 // during the function we are about to execute, it will 671 // have a valid SP to run on the g0 stack. 672 // The next few lines (after the havem label) 673 // will save this SP onto the stack and then write 674 // the same SP back to m->sched.sp. That seems redundant, 675 // but if an unrecovered panic happens, unwindm will 676 // restore the g->sched.sp from the stack location 677 // and then systemstack will try to use it. If we don't set it here, 678 // that restored SP will be uninitialized (typically 0) and 679 // will not be usable. 680 MOVL m_g0(BP), SI 681 MOVL SP, (g_sched+gobuf_sp)(SI) 682 683 havem: 684 // Now there's a valid m, and we're running on its m->g0. 685 // Save current m->g0->sched.sp on stack and then set it to SP. 686 // Save current sp in m->g0->sched.sp in preparation for 687 // switch back to m->curg stack. 688 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 689 MOVL m_g0(BP), SI 690 MOVL (g_sched+gobuf_sp)(SI), AX 691 MOVL AX, 0(SP) 692 MOVL SP, (g_sched+gobuf_sp)(SI) 693 694 // Switch to m->curg stack and call runtime.cgocallbackg. 695 // Because we are taking over the execution of m->curg 696 // but *not* resuming what had been running, we need to 697 // save that information (m->curg->sched) so we can restore it. 698 // We can restore m->curg->sched.sp easily, because calling 699 // runtime.cgocallbackg leaves SP unchanged upon return. 700 // To save m->curg->sched.pc, we push it onto the stack. 701 // This has the added benefit that it looks to the traceback 702 // routine like cgocallbackg is going to return to that 703 // PC (because the frame we allocate below has the same 704 // size as cgocallback_gofunc's frame declared above) 705 // so that the traceback will seamlessly trace back into 706 // the earlier calls. 707 // 708 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 709 // 8(SP) is unused. 710 MOVL m_curg(BP), SI 711 MOVL SI, g(CX) 712 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 713 MOVL (g_sched+gobuf_pc)(SI), BP 714 MOVL BP, -4(DI) 715 MOVL ctxt+12(FP), CX 716 LEAL -(4+12)(DI), SP 717 MOVL DX, 4(SP) 718 MOVL CX, 0(SP) 719 CALL runtime·cgocallbackg(SB) 720 MOVL 4(SP), DX 721 722 // Restore g->sched (== m->curg->sched) from saved values. 723 get_tls(CX) 724 MOVL g(CX), SI 725 MOVL 12(SP), BP 726 MOVL BP, (g_sched+gobuf_pc)(SI) 727 LEAL (12+4)(SP), DI 728 MOVL DI, (g_sched+gobuf_sp)(SI) 729 730 // Switch back to m->g0's stack and restore m->g0->sched.sp. 731 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 732 // so we do not have to restore it.) 733 MOVL g(CX), BP 734 MOVL g_m(BP), BP 735 MOVL m_g0(BP), SI 736 MOVL SI, g(CX) 737 MOVL (g_sched+gobuf_sp)(SI), SP 738 MOVL 0(SP), AX 739 MOVL AX, (g_sched+gobuf_sp)(SI) 740 741 // If the m on entry was nil, we called needm above to borrow an m 742 // for the duration of the call. Since the call is over, return it with dropm. 743 CMPL DX, $0 744 JNE 3(PC) 745 MOVL $runtime·dropm(SB), AX 746 CALL AX 747 748 // Done! 749 RET 750 751 // void setg(G*); set g. for use by needm. 752 TEXT runtime·setg(SB), NOSPLIT, $0-4 753 MOVL gg+0(FP), BX 754 #ifdef GOOS_windows 755 CMPL BX, $0 756 JNE settls 757 MOVL $0, 0x14(FS) 758 RET 759 settls: 760 MOVL g_m(BX), AX 761 LEAL m_tls(AX), AX 762 MOVL AX, 0x14(FS) 763 #endif 764 get_tls(CX) 765 MOVL BX, g(CX) 766 RET 767 768 // void setg_gcc(G*); set g. for use by gcc 769 TEXT setg_gcc<>(SB), NOSPLIT, $0 770 get_tls(AX) 771 MOVL gg+0(FP), DX 772 MOVL DX, g(AX) 773 RET 774 775 // check that SP is in range [g->stack.lo, g->stack.hi) 776 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 777 get_tls(CX) 778 MOVL g(CX), AX 779 CMPL (g_stack+stack_hi)(AX), SP 780 JHI 2(PC) 781 INT $3 782 CMPL SP, (g_stack+stack_lo)(AX) 783 JHI 2(PC) 784 INT $3 785 RET 786 787 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 788 MOVL argp+0(FP),AX // addr of first arg 789 MOVL -4(AX),AX // get calling pc 790 CMPL AX, runtime·stackBarrierPC(SB) 791 JNE nobar 792 // Get original return PC. 793 CALL runtime·nextBarrierPC(SB) 794 MOVL 0(SP), AX 795 nobar: 796 MOVL AX, ret+4(FP) 797 RET 798 799 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 800 MOVL argp+0(FP),AX // addr of first arg 801 MOVL pc+4(FP), BX 802 MOVL -4(AX), DX 803 CMPL DX, runtime·stackBarrierPC(SB) 804 JEQ setbar 805 MOVL BX, -4(AX) // set calling pc 806 RET 807 setbar: 808 // Set the stack barrier return PC. 809 MOVL BX, 0(SP) 810 CALL runtime·setNextBarrierPC(SB) 811 RET 812 813 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 814 MOVL argp+0(FP), AX 815 MOVL AX, ret+4(FP) 816 RET 817 818 // func cputicks() int64 819 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 820 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 821 JEQ done 822 CMPB runtime·lfenceBeforeRdtsc(SB), $1 823 JNE mfence 824 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 825 JMP done 826 mfence: 827 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 828 done: 829 RDTSC 830 MOVL AX, ret_lo+0(FP) 831 MOVL DX, ret_hi+4(FP) 832 RET 833 834 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 835 // set up ldt 7 to point at m0.tls 836 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 837 // the entry number is just a hint. setldt will set up GS with what it used. 838 MOVL $7, 0(SP) 839 LEAL runtime·m0+m_tls(SB), AX 840 MOVL AX, 4(SP) 841 MOVL $32, 8(SP) // sizeof(tls array) 842 CALL runtime·setldt(SB) 843 RET 844 845 TEXT runtime·emptyfunc(SB),0,$0-0 846 RET 847 848 TEXT runtime·abort(SB),NOSPLIT,$0-0 849 INT $0x3 850 851 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 852 // redirects to memhash(p, h, size) using the size 853 // stored in the closure. 854 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 855 GO_ARGS 856 NO_LOCAL_POINTERS 857 MOVL p+0(FP), AX 858 MOVL h+4(FP), BX 859 MOVL 4(DX), CX 860 MOVL AX, 0(SP) 861 MOVL BX, 4(SP) 862 MOVL CX, 8(SP) 863 CALL runtime·memhash(SB) 864 MOVL 12(SP), AX 865 MOVL AX, ret+8(FP) 866 RET 867 868 // hash function using AES hardware instructions 869 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 870 MOVL p+0(FP), AX // ptr to data 871 MOVL s+8(FP), BX // size 872 LEAL ret+12(FP), DX 873 JMP runtime·aeshashbody(SB) 874 875 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 876 MOVL p+0(FP), AX // ptr to string object 877 MOVL 4(AX), BX // length of string 878 MOVL (AX), AX // string data 879 LEAL ret+8(FP), DX 880 JMP runtime·aeshashbody(SB) 881 882 // AX: data 883 // BX: length 884 // DX: address to put return value 885 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 886 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 887 PINSRW $4, BX, X0 // 16 bits of length 888 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 889 MOVO X0, X1 // save unscrambled seed 890 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 891 AESENC X0, X0 // scramble seed 892 893 CMPL BX, $16 894 JB aes0to15 895 JE aes16 896 CMPL BX, $32 897 JBE aes17to32 898 CMPL BX, $64 899 JBE aes33to64 900 JMP aes65plus 901 902 aes0to15: 903 TESTL BX, BX 904 JE aes0 905 906 ADDL $16, AX 907 TESTW $0xff0, AX 908 JE endofpage 909 910 // 16 bytes loaded at this address won't cross 911 // a page boundary, so we can load it directly. 912 MOVOU -16(AX), X1 913 ADDL BX, BX 914 PAND masks<>(SB)(BX*8), X1 915 916 final1: 917 AESENC X0, X1 // scramble input, xor in seed 918 AESENC X1, X1 // scramble combo 2 times 919 AESENC X1, X1 920 MOVL X1, (DX) 921 RET 922 923 endofpage: 924 // address ends in 1111xxxx. Might be up against 925 // a page boundary, so load ending at last byte. 926 // Then shift bytes down using pshufb. 927 MOVOU -32(AX)(BX*1), X1 928 ADDL BX, BX 929 PSHUFB shifts<>(SB)(BX*8), X1 930 JMP final1 931 932 aes0: 933 // Return scrambled input seed 934 AESENC X0, X0 935 MOVL X0, (DX) 936 RET 937 938 aes16: 939 MOVOU (AX), X1 940 JMP final1 941 942 aes17to32: 943 // make second starting seed 944 PXOR runtime·aeskeysched+16(SB), X1 945 AESENC X1, X1 946 947 // load data to be hashed 948 MOVOU (AX), X2 949 MOVOU -16(AX)(BX*1), X3 950 951 // scramble 3 times 952 AESENC X0, X2 953 AESENC X1, X3 954 AESENC X2, X2 955 AESENC X3, X3 956 AESENC X2, X2 957 AESENC X3, X3 958 959 // combine results 960 PXOR X3, X2 961 MOVL X2, (DX) 962 RET 963 964 aes33to64: 965 // make 3 more starting seeds 966 MOVO X1, X2 967 MOVO X1, X3 968 PXOR runtime·aeskeysched+16(SB), X1 969 PXOR runtime·aeskeysched+32(SB), X2 970 PXOR runtime·aeskeysched+48(SB), X3 971 AESENC X1, X1 972 AESENC X2, X2 973 AESENC X3, X3 974 975 MOVOU (AX), X4 976 MOVOU 16(AX), X5 977 MOVOU -32(AX)(BX*1), X6 978 MOVOU -16(AX)(BX*1), X7 979 980 AESENC X0, X4 981 AESENC X1, X5 982 AESENC X2, X6 983 AESENC X3, X7 984 985 AESENC X4, X4 986 AESENC X5, X5 987 AESENC X6, X6 988 AESENC X7, X7 989 990 AESENC X4, X4 991 AESENC X5, X5 992 AESENC X6, X6 993 AESENC X7, X7 994 995 PXOR X6, X4 996 PXOR X7, X5 997 PXOR X5, X4 998 MOVL X4, (DX) 999 RET 1000 1001 aes65plus: 1002 // make 3 more starting seeds 1003 MOVO X1, X2 1004 MOVO X1, X3 1005 PXOR runtime·aeskeysched+16(SB), X1 1006 PXOR runtime·aeskeysched+32(SB), X2 1007 PXOR runtime·aeskeysched+48(SB), X3 1008 AESENC X1, X1 1009 AESENC X2, X2 1010 AESENC X3, X3 1011 1012 // start with last (possibly overlapping) block 1013 MOVOU -64(AX)(BX*1), X4 1014 MOVOU -48(AX)(BX*1), X5 1015 MOVOU -32(AX)(BX*1), X6 1016 MOVOU -16(AX)(BX*1), X7 1017 1018 // scramble state once 1019 AESENC X0, X4 1020 AESENC X1, X5 1021 AESENC X2, X6 1022 AESENC X3, X7 1023 1024 // compute number of remaining 64-byte blocks 1025 DECL BX 1026 SHRL $6, BX 1027 1028 aesloop: 1029 // scramble state, xor in a block 1030 MOVOU (AX), X0 1031 MOVOU 16(AX), X1 1032 MOVOU 32(AX), X2 1033 MOVOU 48(AX), X3 1034 AESENC X0, X4 1035 AESENC X1, X5 1036 AESENC X2, X6 1037 AESENC X3, X7 1038 1039 // scramble state 1040 AESENC X4, X4 1041 AESENC X5, X5 1042 AESENC X6, X6 1043 AESENC X7, X7 1044 1045 ADDL $64, AX 1046 DECL BX 1047 JNE aesloop 1048 1049 // 2 more scrambles to finish 1050 AESENC X4, X4 1051 AESENC X5, X5 1052 AESENC X6, X6 1053 AESENC X7, X7 1054 1055 AESENC X4, X4 1056 AESENC X5, X5 1057 AESENC X6, X6 1058 AESENC X7, X7 1059 1060 PXOR X6, X4 1061 PXOR X7, X5 1062 PXOR X5, X4 1063 MOVL X4, (DX) 1064 RET 1065 1066 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1067 MOVL p+0(FP), AX // ptr to data 1068 MOVL h+4(FP), X0 // seed 1069 PINSRD $1, (AX), X0 // data 1070 AESENC runtime·aeskeysched+0(SB), X0 1071 AESENC runtime·aeskeysched+16(SB), X0 1072 AESENC runtime·aeskeysched+32(SB), X0 1073 MOVL X0, ret+8(FP) 1074 RET 1075 1076 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1077 MOVL p+0(FP), AX // ptr to data 1078 MOVQ (AX), X0 // data 1079 PINSRD $2, h+4(FP), X0 // seed 1080 AESENC runtime·aeskeysched+0(SB), X0 1081 AESENC runtime·aeskeysched+16(SB), X0 1082 AESENC runtime·aeskeysched+32(SB), X0 1083 MOVL X0, ret+8(FP) 1084 RET 1085 1086 // simple mask to get rid of data in the high part of the register. 1087 DATA masks<>+0x00(SB)/4, $0x00000000 1088 DATA masks<>+0x04(SB)/4, $0x00000000 1089 DATA masks<>+0x08(SB)/4, $0x00000000 1090 DATA masks<>+0x0c(SB)/4, $0x00000000 1091 1092 DATA masks<>+0x10(SB)/4, $0x000000ff 1093 DATA masks<>+0x14(SB)/4, $0x00000000 1094 DATA masks<>+0x18(SB)/4, $0x00000000 1095 DATA masks<>+0x1c(SB)/4, $0x00000000 1096 1097 DATA masks<>+0x20(SB)/4, $0x0000ffff 1098 DATA masks<>+0x24(SB)/4, $0x00000000 1099 DATA masks<>+0x28(SB)/4, $0x00000000 1100 DATA masks<>+0x2c(SB)/4, $0x00000000 1101 1102 DATA masks<>+0x30(SB)/4, $0x00ffffff 1103 DATA masks<>+0x34(SB)/4, $0x00000000 1104 DATA masks<>+0x38(SB)/4, $0x00000000 1105 DATA masks<>+0x3c(SB)/4, $0x00000000 1106 1107 DATA masks<>+0x40(SB)/4, $0xffffffff 1108 DATA masks<>+0x44(SB)/4, $0x00000000 1109 DATA masks<>+0x48(SB)/4, $0x00000000 1110 DATA masks<>+0x4c(SB)/4, $0x00000000 1111 1112 DATA masks<>+0x50(SB)/4, $0xffffffff 1113 DATA masks<>+0x54(SB)/4, $0x000000ff 1114 DATA masks<>+0x58(SB)/4, $0x00000000 1115 DATA masks<>+0x5c(SB)/4, $0x00000000 1116 1117 DATA masks<>+0x60(SB)/4, $0xffffffff 1118 DATA masks<>+0x64(SB)/4, $0x0000ffff 1119 DATA masks<>+0x68(SB)/4, $0x00000000 1120 DATA masks<>+0x6c(SB)/4, $0x00000000 1121 1122 DATA masks<>+0x70(SB)/4, $0xffffffff 1123 DATA masks<>+0x74(SB)/4, $0x00ffffff 1124 DATA masks<>+0x78(SB)/4, $0x00000000 1125 DATA masks<>+0x7c(SB)/4, $0x00000000 1126 1127 DATA masks<>+0x80(SB)/4, $0xffffffff 1128 DATA masks<>+0x84(SB)/4, $0xffffffff 1129 DATA masks<>+0x88(SB)/4, $0x00000000 1130 DATA masks<>+0x8c(SB)/4, $0x00000000 1131 1132 DATA masks<>+0x90(SB)/4, $0xffffffff 1133 DATA masks<>+0x94(SB)/4, $0xffffffff 1134 DATA masks<>+0x98(SB)/4, $0x000000ff 1135 DATA masks<>+0x9c(SB)/4, $0x00000000 1136 1137 DATA masks<>+0xa0(SB)/4, $0xffffffff 1138 DATA masks<>+0xa4(SB)/4, $0xffffffff 1139 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1140 DATA masks<>+0xac(SB)/4, $0x00000000 1141 1142 DATA masks<>+0xb0(SB)/4, $0xffffffff 1143 DATA masks<>+0xb4(SB)/4, $0xffffffff 1144 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1145 DATA masks<>+0xbc(SB)/4, $0x00000000 1146 1147 DATA masks<>+0xc0(SB)/4, $0xffffffff 1148 DATA masks<>+0xc4(SB)/4, $0xffffffff 1149 DATA masks<>+0xc8(SB)/4, $0xffffffff 1150 DATA masks<>+0xcc(SB)/4, $0x00000000 1151 1152 DATA masks<>+0xd0(SB)/4, $0xffffffff 1153 DATA masks<>+0xd4(SB)/4, $0xffffffff 1154 DATA masks<>+0xd8(SB)/4, $0xffffffff 1155 DATA masks<>+0xdc(SB)/4, $0x000000ff 1156 1157 DATA masks<>+0xe0(SB)/4, $0xffffffff 1158 DATA masks<>+0xe4(SB)/4, $0xffffffff 1159 DATA masks<>+0xe8(SB)/4, $0xffffffff 1160 DATA masks<>+0xec(SB)/4, $0x0000ffff 1161 1162 DATA masks<>+0xf0(SB)/4, $0xffffffff 1163 DATA masks<>+0xf4(SB)/4, $0xffffffff 1164 DATA masks<>+0xf8(SB)/4, $0xffffffff 1165 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1166 1167 GLOBL masks<>(SB),RODATA,$256 1168 1169 // these are arguments to pshufb. They move data down from 1170 // the high bytes of the register to the low bytes of the register. 1171 // index is how many bytes to move. 1172 DATA shifts<>+0x00(SB)/4, $0x00000000 1173 DATA shifts<>+0x04(SB)/4, $0x00000000 1174 DATA shifts<>+0x08(SB)/4, $0x00000000 1175 DATA shifts<>+0x0c(SB)/4, $0x00000000 1176 1177 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1178 DATA shifts<>+0x14(SB)/4, $0xffffffff 1179 DATA shifts<>+0x18(SB)/4, $0xffffffff 1180 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1181 1182 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1183 DATA shifts<>+0x24(SB)/4, $0xffffffff 1184 DATA shifts<>+0x28(SB)/4, $0xffffffff 1185 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1186 1187 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1188 DATA shifts<>+0x34(SB)/4, $0xffffffff 1189 DATA shifts<>+0x38(SB)/4, $0xffffffff 1190 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1191 1192 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1193 DATA shifts<>+0x44(SB)/4, $0xffffffff 1194 DATA shifts<>+0x48(SB)/4, $0xffffffff 1195 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1196 1197 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1198 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1199 DATA shifts<>+0x58(SB)/4, $0xffffffff 1200 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1201 1202 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1203 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1204 DATA shifts<>+0x68(SB)/4, $0xffffffff 1205 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1206 1207 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1208 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1209 DATA shifts<>+0x78(SB)/4, $0xffffffff 1210 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1211 1212 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1213 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1214 DATA shifts<>+0x88(SB)/4, $0xffffffff 1215 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1216 1217 DATA shifts<>+0x90(SB)/4, $0x0a090807 1218 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1219 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1220 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1221 1222 DATA shifts<>+0xa0(SB)/4, $0x09080706 1223 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1224 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1225 DATA shifts<>+0xac(SB)/4, $0xffffffff 1226 1227 DATA shifts<>+0xb0(SB)/4, $0x08070605 1228 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1229 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1230 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1231 1232 DATA shifts<>+0xc0(SB)/4, $0x07060504 1233 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1234 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1235 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1236 1237 DATA shifts<>+0xd0(SB)/4, $0x06050403 1238 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1239 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1240 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1241 1242 DATA shifts<>+0xe0(SB)/4, $0x05040302 1243 DATA shifts<>+0xe4(SB)/4, $0x09080706 1244 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1245 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1246 1247 DATA shifts<>+0xf0(SB)/4, $0x04030201 1248 DATA shifts<>+0xf4(SB)/4, $0x08070605 1249 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1250 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1251 1252 GLOBL shifts<>(SB),RODATA,$256 1253 1254 TEXT ·checkASM(SB),NOSPLIT,$0-1 1255 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1256 MOVL $masks<>(SB), AX 1257 MOVL $shifts<>(SB), BX 1258 ORL BX, AX 1259 TESTL $15, AX 1260 SETEQ ret+0(FP) 1261 RET 1262 1263 // memequal(p, q unsafe.Pointer, size uintptr) bool 1264 TEXT runtime·memequal(SB),NOSPLIT,$0-13 1265 MOVL a+0(FP), SI 1266 MOVL b+4(FP), DI 1267 CMPL SI, DI 1268 JEQ eq 1269 MOVL size+8(FP), BX 1270 LEAL ret+12(FP), AX 1271 JMP runtime·memeqbody(SB) 1272 eq: 1273 MOVB $1, ret+12(FP) 1274 RET 1275 1276 // memequal_varlen(a, b unsafe.Pointer) bool 1277 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1278 MOVL a+0(FP), SI 1279 MOVL b+4(FP), DI 1280 CMPL SI, DI 1281 JEQ eq 1282 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1283 LEAL ret+8(FP), AX 1284 JMP runtime·memeqbody(SB) 1285 eq: 1286 MOVB $1, ret+8(FP) 1287 RET 1288 1289 // eqstring tests whether two strings are equal. 1290 // The compiler guarantees that strings passed 1291 // to eqstring have equal length. 1292 // See runtime_test.go:eqstring_generic for 1293 // equivalent Go code. 1294 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1295 MOVL s1str+0(FP), SI 1296 MOVL s2str+8(FP), DI 1297 CMPL SI, DI 1298 JEQ same 1299 MOVL s1len+4(FP), BX 1300 LEAL v+16(FP), AX 1301 JMP runtime·memeqbody(SB) 1302 same: 1303 MOVB $1, v+16(FP) 1304 RET 1305 1306 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1307 MOVL a_len+4(FP), BX 1308 MOVL b_len+16(FP), CX 1309 CMPL BX, CX 1310 JNE eqret 1311 MOVL a+0(FP), SI 1312 MOVL b+12(FP), DI 1313 LEAL ret+24(FP), AX 1314 JMP runtime·memeqbody(SB) 1315 eqret: 1316 MOVB $0, ret+24(FP) 1317 RET 1318 1319 // a in SI 1320 // b in DI 1321 // count in BX 1322 // address of result byte in AX 1323 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1324 CMPL BX, $4 1325 JB small 1326 1327 // 64 bytes at a time using xmm registers 1328 hugeloop: 1329 CMPL BX, $64 1330 JB bigloop 1331 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1332 JE bigloop 1333 MOVOU (SI), X0 1334 MOVOU (DI), X1 1335 MOVOU 16(SI), X2 1336 MOVOU 16(DI), X3 1337 MOVOU 32(SI), X4 1338 MOVOU 32(DI), X5 1339 MOVOU 48(SI), X6 1340 MOVOU 48(DI), X7 1341 PCMPEQB X1, X0 1342 PCMPEQB X3, X2 1343 PCMPEQB X5, X4 1344 PCMPEQB X7, X6 1345 PAND X2, X0 1346 PAND X6, X4 1347 PAND X4, X0 1348 PMOVMSKB X0, DX 1349 ADDL $64, SI 1350 ADDL $64, DI 1351 SUBL $64, BX 1352 CMPL DX, $0xffff 1353 JEQ hugeloop 1354 MOVB $0, (AX) 1355 RET 1356 1357 // 4 bytes at a time using 32-bit register 1358 bigloop: 1359 CMPL BX, $4 1360 JBE leftover 1361 MOVL (SI), CX 1362 MOVL (DI), DX 1363 ADDL $4, SI 1364 ADDL $4, DI 1365 SUBL $4, BX 1366 CMPL CX, DX 1367 JEQ bigloop 1368 MOVB $0, (AX) 1369 RET 1370 1371 // remaining 0-4 bytes 1372 leftover: 1373 MOVL -4(SI)(BX*1), CX 1374 MOVL -4(DI)(BX*1), DX 1375 CMPL CX, DX 1376 SETEQ (AX) 1377 RET 1378 1379 small: 1380 CMPL BX, $0 1381 JEQ equal 1382 1383 LEAL 0(BX*8), CX 1384 NEGL CX 1385 1386 MOVL SI, DX 1387 CMPB DX, $0xfc 1388 JA si_high 1389 1390 // load at SI won't cross a page boundary. 1391 MOVL (SI), SI 1392 JMP si_finish 1393 si_high: 1394 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1395 MOVL -4(SI)(BX*1), SI 1396 SHRL CX, SI 1397 si_finish: 1398 1399 // same for DI. 1400 MOVL DI, DX 1401 CMPB DX, $0xfc 1402 JA di_high 1403 MOVL (DI), DI 1404 JMP di_finish 1405 di_high: 1406 MOVL -4(DI)(BX*1), DI 1407 SHRL CX, DI 1408 di_finish: 1409 1410 SUBL SI, DI 1411 SHLL CX, DI 1412 equal: 1413 SETEQ (AX) 1414 RET 1415 1416 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1417 MOVL s1_base+0(FP), SI 1418 MOVL s1_len+4(FP), BX 1419 MOVL s2_base+8(FP), DI 1420 MOVL s2_len+12(FP), DX 1421 LEAL ret+16(FP), AX 1422 JMP runtime·cmpbody(SB) 1423 1424 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1425 MOVL s1+0(FP), SI 1426 MOVL s1+4(FP), BX 1427 MOVL s2+12(FP), DI 1428 MOVL s2+16(FP), DX 1429 LEAL ret+24(FP), AX 1430 JMP runtime·cmpbody(SB) 1431 1432 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1433 MOVL s+0(FP), SI 1434 MOVL s_len+4(FP), CX 1435 MOVB c+12(FP), AL 1436 MOVL SI, DI 1437 CLD; REPN; SCASB 1438 JZ 3(PC) 1439 MOVL $-1, ret+16(FP) 1440 RET 1441 SUBL SI, DI 1442 SUBL $1, DI 1443 MOVL DI, ret+16(FP) 1444 RET 1445 1446 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1447 MOVL s+0(FP), SI 1448 MOVL s_len+4(FP), CX 1449 MOVB c+8(FP), AL 1450 MOVL SI, DI 1451 CLD; REPN; SCASB 1452 JZ 3(PC) 1453 MOVL $-1, ret+12(FP) 1454 RET 1455 SUBL SI, DI 1456 SUBL $1, DI 1457 MOVL DI, ret+12(FP) 1458 RET 1459 1460 // input: 1461 // SI = a 1462 // DI = b 1463 // BX = alen 1464 // DX = blen 1465 // AX = address of return word (set to 1/0/-1) 1466 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1467 MOVL DX, BP 1468 SUBL BX, DX // DX = blen-alen 1469 JLE 2(PC) 1470 MOVL BX, BP // BP = min(alen, blen) 1471 CMPL SI, DI 1472 JEQ allsame 1473 CMPL BP, $4 1474 JB small 1475 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1476 JE mediumloop 1477 largeloop: 1478 CMPL BP, $16 1479 JB mediumloop 1480 MOVOU (SI), X0 1481 MOVOU (DI), X1 1482 PCMPEQB X0, X1 1483 PMOVMSKB X1, BX 1484 XORL $0xffff, BX // convert EQ to NE 1485 JNE diff16 // branch if at least one byte is not equal 1486 ADDL $16, SI 1487 ADDL $16, DI 1488 SUBL $16, BP 1489 JMP largeloop 1490 1491 diff16: 1492 BSFL BX, BX // index of first byte that differs 1493 XORL DX, DX 1494 MOVB (SI)(BX*1), CX 1495 CMPB CX, (DI)(BX*1) 1496 SETHI DX 1497 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1498 MOVL DX, (AX) 1499 RET 1500 1501 mediumloop: 1502 CMPL BP, $4 1503 JBE _0through4 1504 MOVL (SI), BX 1505 MOVL (DI), CX 1506 CMPL BX, CX 1507 JNE diff4 1508 ADDL $4, SI 1509 ADDL $4, DI 1510 SUBL $4, BP 1511 JMP mediumloop 1512 1513 _0through4: 1514 MOVL -4(SI)(BP*1), BX 1515 MOVL -4(DI)(BP*1), CX 1516 CMPL BX, CX 1517 JEQ allsame 1518 1519 diff4: 1520 BSWAPL BX // reverse order of bytes 1521 BSWAPL CX 1522 XORL BX, CX // find bit differences 1523 BSRL CX, CX // index of highest bit difference 1524 SHRL CX, BX // move a's bit to bottom 1525 ANDL $1, BX // mask bit 1526 LEAL -1(BX*2), BX // 1/0 => +1/-1 1527 MOVL BX, (AX) 1528 RET 1529 1530 // 0-3 bytes in common 1531 small: 1532 LEAL (BP*8), CX 1533 NEGL CX 1534 JEQ allsame 1535 1536 // load si 1537 CMPB SI, $0xfc 1538 JA si_high 1539 MOVL (SI), SI 1540 JMP si_finish 1541 si_high: 1542 MOVL -4(SI)(BP*1), SI 1543 SHRL CX, SI 1544 si_finish: 1545 SHLL CX, SI 1546 1547 // same for di 1548 CMPB DI, $0xfc 1549 JA di_high 1550 MOVL (DI), DI 1551 JMP di_finish 1552 di_high: 1553 MOVL -4(DI)(BP*1), DI 1554 SHRL CX, DI 1555 di_finish: 1556 SHLL CX, DI 1557 1558 BSWAPL SI // reverse order of bytes 1559 BSWAPL DI 1560 XORL SI, DI // find bit differences 1561 JEQ allsame 1562 BSRL DI, CX // index of highest bit difference 1563 SHRL CX, SI // move a's bit to bottom 1564 ANDL $1, SI // mask bit 1565 LEAL -1(SI*2), BX // 1/0 => +1/-1 1566 MOVL BX, (AX) 1567 RET 1568 1569 // all the bytes in common are the same, so we just need 1570 // to compare the lengths. 1571 allsame: 1572 XORL BX, BX 1573 XORL CX, CX 1574 TESTL DX, DX 1575 SETLT BX // 1 if alen > blen 1576 SETEQ CX // 1 if alen == blen 1577 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1578 MOVL BX, (AX) 1579 RET 1580 1581 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1582 get_tls(CX) 1583 MOVL g(CX), AX 1584 MOVL g_m(AX), AX 1585 MOVL m_fastrand(AX), DX 1586 ADDL DX, DX 1587 MOVL DX, BX 1588 XORL $0x88888eef, DX 1589 JPL 2(PC) 1590 MOVL BX, DX 1591 MOVL DX, m_fastrand(AX) 1592 MOVL DX, ret+0(FP) 1593 RET 1594 1595 TEXT runtime·return0(SB), NOSPLIT, $0 1596 MOVL $0, AX 1597 RET 1598 1599 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1600 // Must obey the gcc calling convention. 1601 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1602 get_tls(CX) 1603 MOVL g(CX), AX 1604 MOVL g_m(AX), AX 1605 MOVL m_curg(AX), AX 1606 MOVL (g_stack+stack_hi)(AX), AX 1607 RET 1608 1609 // The top-most function running on a goroutine 1610 // returns to goexit+PCQuantum. 1611 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1612 BYTE $0x90 // NOP 1613 CALL runtime·goexit1(SB) // does not return 1614 // traceback from goexit1 must hit code range of goexit 1615 BYTE $0x90 // NOP 1616 1617 // Prefetching doesn't seem to help. 1618 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1619 RET 1620 1621 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1622 RET 1623 1624 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1625 RET 1626 1627 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1628 RET 1629 1630 // Add a module's moduledata to the linked list of moduledata objects. This 1631 // is called from .init_array by a function generated in the linker and so 1632 // follows the platform ABI wrt register preservation -- it only touches AX, 1633 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1634 // instead the pointer to the moduledata is passed in AX. 1635 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1636 MOVL runtime·lastmoduledatap(SB), DX 1637 MOVL AX, moduledata_next(DX) 1638 MOVL AX, runtime·lastmoduledatap(SB) 1639 RET