github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 MOVL AX, SI 58 CMPL AX, $0 59 JE nocpuinfo 60 61 // Figure out how to serialize RDTSC. 62 // On Intel processors LFENCE is enough. AMD requires MFENCE. 63 // Don't know about the rest, so let's do MFENCE. 64 CMPL BX, $0x756E6547 // "Genu" 65 JNE notintel 66 CMPL DX, $0x49656E69 // "ineI" 67 JNE notintel 68 CMPL CX, $0x6C65746E // "ntel" 69 JNE notintel 70 MOVB $1, runtime·isIntel(SB) 71 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 72 notintel: 73 74 // Load EAX=1 cpuid flags 75 MOVL $1, AX 76 CPUID 77 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 78 MOVL AX, runtime·cpuid_eax(SB) 79 MOVL DI, runtime·cpuid_ecx(SB) 80 MOVL DX, runtime·cpuid_edx(SB) 81 82 // Check for MMX support 83 TESTL $(1<<23), DX // MMX 84 JZ bad_proc 85 86 // Load EAX=7/ECX=0 cpuid flags 87 CMPL SI, $7 88 JLT nocpuinfo 89 MOVL $7, AX 90 MOVL $0, CX 91 CPUID 92 MOVL BX, runtime·cpuid_ebx7(SB) 93 94 nocpuinfo: 95 96 // if there is an _cgo_init, call it to let it 97 // initialize and to set up GS. if not, 98 // we set up GS ourselves. 99 MOVL _cgo_init(SB), AX 100 TESTL AX, AX 101 JZ needtls 102 MOVL $setg_gcc<>(SB), BX 103 MOVL BX, 4(SP) 104 MOVL BP, 0(SP) 105 CALL AX 106 107 // update stackguard after _cgo_init 108 MOVL $runtime·g0(SB), CX 109 MOVL (g_stack+stack_lo)(CX), AX 110 ADDL $const__StackGuard, AX 111 MOVL AX, g_stackguard0(CX) 112 MOVL AX, g_stackguard1(CX) 113 114 #ifndef GOOS_windows 115 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 116 JMP ok 117 #endif 118 needtls: 119 #ifdef GOOS_plan9 120 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 121 JMP ok 122 #endif 123 124 // set up %gs 125 CALL runtime·ldt0setup(SB) 126 127 // store through it, to make sure it works 128 get_tls(BX) 129 MOVL $0x123, g(BX) 130 MOVL runtime·m0+m_tls(SB), AX 131 CMPL AX, $0x123 132 JEQ ok 133 MOVL AX, 0 // abort 134 ok: 135 // set up m and g "registers" 136 get_tls(BX) 137 LEAL runtime·g0(SB), DX 138 MOVL DX, g(BX) 139 LEAL runtime·m0(SB), AX 140 141 // save m->g0 = g0 142 MOVL DX, m_g0(AX) 143 // save g0->m = m0 144 MOVL AX, g_m(DX) 145 146 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 147 148 // convention is D is always cleared 149 CLD 150 151 CALL runtime·check(SB) 152 153 // saved argc, argv 154 MOVL 120(SP), AX 155 MOVL AX, 0(SP) 156 MOVL 124(SP), AX 157 MOVL AX, 4(SP) 158 CALL runtime·args(SB) 159 CALL runtime·osinit(SB) 160 CALL runtime·schedinit(SB) 161 162 // create a new goroutine to start program 163 PUSHL $runtime·mainPC(SB) // entry 164 PUSHL $0 // arg size 165 CALL runtime·newproc(SB) 166 POPL AX 167 POPL AX 168 169 // start this M 170 CALL runtime·mstart(SB) 171 172 INT $3 173 RET 174 175 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 176 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 177 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 178 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 179 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 180 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 181 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 182 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 183 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 184 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 185 186 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 187 GLOBL runtime·mainPC(SB),RODATA,$4 188 189 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 190 INT $3 191 RET 192 193 TEXT runtime·asminit(SB),NOSPLIT,$0-0 194 // Linux and MinGW start the FPU in extended double precision. 195 // Other operating systems use double precision. 196 // Change to double precision to match them, 197 // and to match other hardware that only has double. 198 FLDCW runtime·controlWord64(SB) 199 RET 200 201 /* 202 * go-routine 203 */ 204 205 // void gosave(Gobuf*) 206 // save state in Gobuf; setjmp 207 TEXT runtime·gosave(SB), NOSPLIT, $0-4 208 MOVL buf+0(FP), AX // gobuf 209 LEAL buf+0(FP), BX // caller's SP 210 MOVL BX, gobuf_sp(AX) 211 MOVL 0(SP), BX // caller's PC 212 MOVL BX, gobuf_pc(AX) 213 MOVL $0, gobuf_ret(AX) 214 // Assert ctxt is zero. See func save. 215 MOVL gobuf_ctxt(AX), BX 216 TESTL BX, BX 217 JZ 2(PC) 218 CALL runtime·badctxt(SB) 219 get_tls(CX) 220 MOVL g(CX), BX 221 MOVL BX, gobuf_g(AX) 222 RET 223 224 // void gogo(Gobuf*) 225 // restore state from Gobuf; longjmp 226 TEXT runtime·gogo(SB), NOSPLIT, $8-4 227 MOVL buf+0(FP), BX // gobuf 228 229 // If ctxt is not nil, invoke deletion barrier before overwriting. 230 MOVL gobuf_ctxt(BX), DX 231 TESTL DX, DX 232 JZ nilctxt 233 LEAL gobuf_ctxt(BX), AX 234 MOVL AX, 0(SP) 235 MOVL $0, 4(SP) 236 CALL runtime·writebarrierptr_prewrite(SB) 237 MOVL buf+0(FP), BX 238 239 nilctxt: 240 MOVL gobuf_g(BX), DX 241 MOVL 0(DX), CX // make sure g != nil 242 get_tls(CX) 243 MOVL DX, g(CX) 244 MOVL gobuf_sp(BX), SP // restore SP 245 MOVL gobuf_ret(BX), AX 246 MOVL gobuf_ctxt(BX), DX 247 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 248 MOVL $0, gobuf_ret(BX) 249 MOVL $0, gobuf_ctxt(BX) 250 MOVL gobuf_pc(BX), BX 251 JMP BX 252 253 // func mcall(fn func(*g)) 254 // Switch to m->g0's stack, call fn(g). 255 // Fn must never return. It should gogo(&g->sched) 256 // to keep running g. 257 TEXT runtime·mcall(SB), NOSPLIT, $0-4 258 MOVL fn+0(FP), DI 259 260 get_tls(DX) 261 MOVL g(DX), AX // save state in g->sched 262 MOVL 0(SP), BX // caller's PC 263 MOVL BX, (g_sched+gobuf_pc)(AX) 264 LEAL fn+0(FP), BX // caller's SP 265 MOVL BX, (g_sched+gobuf_sp)(AX) 266 MOVL AX, (g_sched+gobuf_g)(AX) 267 268 // switch to m->g0 & its stack, call fn 269 MOVL g(DX), BX 270 MOVL g_m(BX), BX 271 MOVL m_g0(BX), SI 272 CMPL SI, AX // if g == m->g0 call badmcall 273 JNE 3(PC) 274 MOVL $runtime·badmcall(SB), AX 275 JMP AX 276 MOVL SI, g(DX) // g = m->g0 277 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 278 PUSHL AX 279 MOVL DI, DX 280 MOVL 0(DI), DI 281 CALL DI 282 POPL AX 283 MOVL $runtime·badmcall2(SB), AX 284 JMP AX 285 RET 286 287 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 288 // of the G stack. We need to distinguish the routine that 289 // lives at the bottom of the G stack from the one that lives 290 // at the top of the system stack because the one at the top of 291 // the system stack terminates the stack walk (see topofstack()). 292 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 293 RET 294 295 // func systemstack(fn func()) 296 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 297 MOVL fn+0(FP), DI // DI = fn 298 get_tls(CX) 299 MOVL g(CX), AX // AX = g 300 MOVL g_m(AX), BX // BX = m 301 302 MOVL m_gsignal(BX), DX // DX = gsignal 303 CMPL AX, DX 304 JEQ noswitch 305 306 MOVL m_g0(BX), DX // DX = g0 307 CMPL AX, DX 308 JEQ noswitch 309 310 MOVL m_curg(BX), BP 311 CMPL AX, BP 312 JEQ switch 313 314 // Bad: g is not gsignal, not g0, not curg. What is it? 315 // Hide call from linker nosplit analysis. 316 MOVL $runtime·badsystemstack(SB), AX 317 CALL AX 318 319 switch: 320 // save our state in g->sched. Pretend to 321 // be systemstack_switch if the G stack is scanned. 322 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 323 MOVL SP, (g_sched+gobuf_sp)(AX) 324 MOVL AX, (g_sched+gobuf_g)(AX) 325 326 // switch to g0 327 get_tls(CX) 328 MOVL DX, g(CX) 329 MOVL (g_sched+gobuf_sp)(DX), BX 330 // make it look like mstart called systemstack on g0, to stop traceback 331 SUBL $4, BX 332 MOVL $runtime·mstart(SB), DX 333 MOVL DX, 0(BX) 334 MOVL BX, SP 335 336 // call target function 337 MOVL DI, DX 338 MOVL 0(DI), DI 339 CALL DI 340 341 // switch back to g 342 get_tls(CX) 343 MOVL g(CX), AX 344 MOVL g_m(AX), BX 345 MOVL m_curg(BX), AX 346 MOVL AX, g(CX) 347 MOVL (g_sched+gobuf_sp)(AX), SP 348 MOVL $0, (g_sched+gobuf_sp)(AX) 349 RET 350 351 noswitch: 352 // already on system stack, just call directly 353 MOVL DI, DX 354 MOVL 0(DI), DI 355 CALL DI 356 RET 357 358 /* 359 * support for morestack 360 */ 361 362 // Called during function prolog when more stack is needed. 363 // 364 // The traceback routines see morestack on a g0 as being 365 // the top of a stack (for example, morestack calling newstack 366 // calling the scheduler calling newm calling gc), so we must 367 // record an argument size. For that purpose, it has no arguments. 368 TEXT runtime·morestack(SB),NOSPLIT,$0-0 369 // Cannot grow scheduler stack (m->g0). 370 get_tls(CX) 371 MOVL g(CX), BX 372 MOVL g_m(BX), BX 373 MOVL m_g0(BX), SI 374 CMPL g(CX), SI 375 JNE 3(PC) 376 CALL runtime·badmorestackg0(SB) 377 INT $3 378 379 // Cannot grow signal stack. 380 MOVL m_gsignal(BX), SI 381 CMPL g(CX), SI 382 JNE 3(PC) 383 CALL runtime·badmorestackgsignal(SB) 384 INT $3 385 386 // Called from f. 387 // Set m->morebuf to f's caller. 388 MOVL 4(SP), DI // f's caller's PC 389 MOVL DI, (m_morebuf+gobuf_pc)(BX) 390 LEAL 8(SP), CX // f's caller's SP 391 MOVL CX, (m_morebuf+gobuf_sp)(BX) 392 get_tls(CX) 393 MOVL g(CX), SI 394 MOVL SI, (m_morebuf+gobuf_g)(BX) 395 396 // Set g->sched to context in f. 397 MOVL 0(SP), AX // f's PC 398 MOVL AX, (g_sched+gobuf_pc)(SI) 399 MOVL SI, (g_sched+gobuf_g)(SI) 400 LEAL 4(SP), AX // f's SP 401 MOVL AX, (g_sched+gobuf_sp)(SI) 402 // newstack will fill gobuf.ctxt. 403 404 // Call newstack on m->g0's stack. 405 MOVL m_g0(BX), BP 406 MOVL BP, g(CX) 407 MOVL (g_sched+gobuf_sp)(BP), AX 408 MOVL -4(AX), BX // fault if CALL would, before smashing SP 409 MOVL AX, SP 410 PUSHL DX // ctxt argument 411 CALL runtime·newstack(SB) 412 MOVL $0, 0x1003 // crash if newstack returns 413 POPL DX // keep balance check happy 414 RET 415 416 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 417 MOVL $0, DX 418 JMP runtime·morestack(SB) 419 420 // reflectcall: call a function with the given argument list 421 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 422 // we don't have variable-sized frames, so we use a small number 423 // of constant-sized-frame functions to encode a few bits of size in the pc. 424 // Caution: ugly multiline assembly macros in your future! 425 426 #define DISPATCH(NAME,MAXSIZE) \ 427 CMPL CX, $MAXSIZE; \ 428 JA 3(PC); \ 429 MOVL $NAME(SB), AX; \ 430 JMP AX 431 // Note: can't just "JMP NAME(SB)" - bad inlining results. 432 433 TEXT reflect·call(SB), NOSPLIT, $0-0 434 JMP ·reflectcall(SB) 435 436 TEXT ·reflectcall(SB), NOSPLIT, $0-20 437 MOVL argsize+12(FP), CX 438 DISPATCH(runtime·call16, 16) 439 DISPATCH(runtime·call32, 32) 440 DISPATCH(runtime·call64, 64) 441 DISPATCH(runtime·call128, 128) 442 DISPATCH(runtime·call256, 256) 443 DISPATCH(runtime·call512, 512) 444 DISPATCH(runtime·call1024, 1024) 445 DISPATCH(runtime·call2048, 2048) 446 DISPATCH(runtime·call4096, 4096) 447 DISPATCH(runtime·call8192, 8192) 448 DISPATCH(runtime·call16384, 16384) 449 DISPATCH(runtime·call32768, 32768) 450 DISPATCH(runtime·call65536, 65536) 451 DISPATCH(runtime·call131072, 131072) 452 DISPATCH(runtime·call262144, 262144) 453 DISPATCH(runtime·call524288, 524288) 454 DISPATCH(runtime·call1048576, 1048576) 455 DISPATCH(runtime·call2097152, 2097152) 456 DISPATCH(runtime·call4194304, 4194304) 457 DISPATCH(runtime·call8388608, 8388608) 458 DISPATCH(runtime·call16777216, 16777216) 459 DISPATCH(runtime·call33554432, 33554432) 460 DISPATCH(runtime·call67108864, 67108864) 461 DISPATCH(runtime·call134217728, 134217728) 462 DISPATCH(runtime·call268435456, 268435456) 463 DISPATCH(runtime·call536870912, 536870912) 464 DISPATCH(runtime·call1073741824, 1073741824) 465 MOVL $runtime·badreflectcall(SB), AX 466 JMP AX 467 468 #define CALLFN(NAME,MAXSIZE) \ 469 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 470 NO_LOCAL_POINTERS; \ 471 /* copy arguments to stack */ \ 472 MOVL argptr+8(FP), SI; \ 473 MOVL argsize+12(FP), CX; \ 474 MOVL SP, DI; \ 475 REP;MOVSB; \ 476 /* call function */ \ 477 MOVL f+4(FP), DX; \ 478 MOVL (DX), AX; \ 479 PCDATA $PCDATA_StackMapIndex, $0; \ 480 CALL AX; \ 481 /* copy return values back */ \ 482 MOVL argtype+0(FP), DX; \ 483 MOVL argptr+8(FP), DI; \ 484 MOVL argsize+12(FP), CX; \ 485 MOVL retoffset+16(FP), BX; \ 486 MOVL SP, SI; \ 487 ADDL BX, DI; \ 488 ADDL BX, SI; \ 489 SUBL BX, CX; \ 490 CALL callRet<>(SB); \ 491 RET 492 493 // callRet copies return values back at the end of call*. This is a 494 // separate function so it can allocate stack space for the arguments 495 // to reflectcallmove. It does not follow the Go ABI; it expects its 496 // arguments in registers. 497 TEXT callRet<>(SB), NOSPLIT, $16-0 498 MOVL DX, 0(SP) 499 MOVL DI, 4(SP) 500 MOVL SI, 8(SP) 501 MOVL CX, 12(SP) 502 CALL runtime·reflectcallmove(SB) 503 RET 504 505 CALLFN(·call16, 16) 506 CALLFN(·call32, 32) 507 CALLFN(·call64, 64) 508 CALLFN(·call128, 128) 509 CALLFN(·call256, 256) 510 CALLFN(·call512, 512) 511 CALLFN(·call1024, 1024) 512 CALLFN(·call2048, 2048) 513 CALLFN(·call4096, 4096) 514 CALLFN(·call8192, 8192) 515 CALLFN(·call16384, 16384) 516 CALLFN(·call32768, 32768) 517 CALLFN(·call65536, 65536) 518 CALLFN(·call131072, 131072) 519 CALLFN(·call262144, 262144) 520 CALLFN(·call524288, 524288) 521 CALLFN(·call1048576, 1048576) 522 CALLFN(·call2097152, 2097152) 523 CALLFN(·call4194304, 4194304) 524 CALLFN(·call8388608, 8388608) 525 CALLFN(·call16777216, 16777216) 526 CALLFN(·call33554432, 33554432) 527 CALLFN(·call67108864, 67108864) 528 CALLFN(·call134217728, 134217728) 529 CALLFN(·call268435456, 268435456) 530 CALLFN(·call536870912, 536870912) 531 CALLFN(·call1073741824, 1073741824) 532 533 TEXT runtime·procyield(SB),NOSPLIT,$0-0 534 MOVL cycles+0(FP), AX 535 again: 536 PAUSE 537 SUBL $1, AX 538 JNZ again 539 RET 540 541 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 542 // Stores are already ordered on x86, so this is just a 543 // compile barrier. 544 RET 545 546 // void jmpdefer(fn, sp); 547 // called from deferreturn. 548 // 1. pop the caller 549 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 550 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 551 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 552 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 553 // 3. jmp to the argument 554 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 555 MOVL fv+0(FP), DX // fn 556 MOVL argp+4(FP), BX // caller sp 557 LEAL -4(BX), SP // caller sp after CALL 558 #ifdef GOBUILDMODE_shared 559 SUBL $16, (SP) // return to CALL again 560 #else 561 SUBL $5, (SP) // return to CALL again 562 #endif 563 MOVL 0(DX), BX 564 JMP BX // but first run the deferred function 565 566 // Save state of caller into g->sched. 567 TEXT gosave<>(SB),NOSPLIT,$0 568 PUSHL AX 569 PUSHL BX 570 get_tls(BX) 571 MOVL g(BX), BX 572 LEAL arg+0(FP), AX 573 MOVL AX, (g_sched+gobuf_sp)(BX) 574 MOVL -4(AX), AX 575 MOVL AX, (g_sched+gobuf_pc)(BX) 576 MOVL $0, (g_sched+gobuf_ret)(BX) 577 // Assert ctxt is zero. See func save. 578 MOVL (g_sched+gobuf_ctxt)(BX), AX 579 TESTL AX, AX 580 JZ 2(PC) 581 CALL runtime·badctxt(SB) 582 POPL BX 583 POPL AX 584 RET 585 586 // func asmcgocall(fn, arg unsafe.Pointer) int32 587 // Call fn(arg) on the scheduler stack, 588 // aligned appropriately for the gcc ABI. 589 // See cgocall.go for more details. 590 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 591 MOVL fn+0(FP), AX 592 MOVL arg+4(FP), BX 593 594 MOVL SP, DX 595 596 // Figure out if we need to switch to m->g0 stack. 597 // We get called to create new OS threads too, and those 598 // come in on the m->g0 stack already. 599 get_tls(CX) 600 MOVL g(CX), BP 601 MOVL g_m(BP), BP 602 MOVL m_g0(BP), SI 603 MOVL g(CX), DI 604 CMPL SI, DI 605 JEQ noswitch 606 CALL gosave<>(SB) 607 get_tls(CX) 608 MOVL SI, g(CX) 609 MOVL (g_sched+gobuf_sp)(SI), SP 610 611 noswitch: 612 // Now on a scheduling stack (a pthread-created stack). 613 SUBL $32, SP 614 ANDL $~15, SP // alignment, perhaps unnecessary 615 MOVL DI, 8(SP) // save g 616 MOVL (g_stack+stack_hi)(DI), DI 617 SUBL DX, DI 618 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 619 MOVL BX, 0(SP) // first argument in x86-32 ABI 620 CALL AX 621 622 // Restore registers, g, stack pointer. 623 get_tls(CX) 624 MOVL 8(SP), DI 625 MOVL (g_stack+stack_hi)(DI), SI 626 SUBL 4(SP), SI 627 MOVL DI, g(CX) 628 MOVL SI, SP 629 630 MOVL AX, ret+8(FP) 631 RET 632 633 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 634 // Turn the fn into a Go func (by taking its address) and call 635 // cgocallback_gofunc. 636 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 637 LEAL fn+0(FP), AX 638 MOVL AX, 0(SP) 639 MOVL frame+4(FP), AX 640 MOVL AX, 4(SP) 641 MOVL framesize+8(FP), AX 642 MOVL AX, 8(SP) 643 MOVL ctxt+12(FP), AX 644 MOVL AX, 12(SP) 645 MOVL $runtime·cgocallback_gofunc(SB), AX 646 CALL AX 647 RET 648 649 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 650 // See cgocall.go for more details. 651 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 652 NO_LOCAL_POINTERS 653 654 // If g is nil, Go did not create the current thread. 655 // Call needm to obtain one for temporary use. 656 // In this case, we're running on the thread stack, so there's 657 // lots of space, but the linker doesn't know. Hide the call from 658 // the linker analysis by using an indirect call through AX. 659 get_tls(CX) 660 #ifdef GOOS_windows 661 MOVL $0, BP 662 CMPL CX, $0 663 JEQ 2(PC) // TODO 664 #endif 665 MOVL g(CX), BP 666 CMPL BP, $0 667 JEQ needm 668 MOVL g_m(BP), BP 669 MOVL BP, DX // saved copy of oldm 670 JMP havem 671 needm: 672 MOVL $0, 0(SP) 673 MOVL $runtime·needm(SB), AX 674 CALL AX 675 MOVL 0(SP), DX 676 get_tls(CX) 677 MOVL g(CX), BP 678 MOVL g_m(BP), BP 679 680 // Set m->sched.sp = SP, so that if a panic happens 681 // during the function we are about to execute, it will 682 // have a valid SP to run on the g0 stack. 683 // The next few lines (after the havem label) 684 // will save this SP onto the stack and then write 685 // the same SP back to m->sched.sp. That seems redundant, 686 // but if an unrecovered panic happens, unwindm will 687 // restore the g->sched.sp from the stack location 688 // and then systemstack will try to use it. If we don't set it here, 689 // that restored SP will be uninitialized (typically 0) and 690 // will not be usable. 691 MOVL m_g0(BP), SI 692 MOVL SP, (g_sched+gobuf_sp)(SI) 693 694 havem: 695 // Now there's a valid m, and we're running on its m->g0. 696 // Save current m->g0->sched.sp on stack and then set it to SP. 697 // Save current sp in m->g0->sched.sp in preparation for 698 // switch back to m->curg stack. 699 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 700 MOVL m_g0(BP), SI 701 MOVL (g_sched+gobuf_sp)(SI), AX 702 MOVL AX, 0(SP) 703 MOVL SP, (g_sched+gobuf_sp)(SI) 704 705 // Switch to m->curg stack and call runtime.cgocallbackg. 706 // Because we are taking over the execution of m->curg 707 // but *not* resuming what had been running, we need to 708 // save that information (m->curg->sched) so we can restore it. 709 // We can restore m->curg->sched.sp easily, because calling 710 // runtime.cgocallbackg leaves SP unchanged upon return. 711 // To save m->curg->sched.pc, we push it onto the stack. 712 // This has the added benefit that it looks to the traceback 713 // routine like cgocallbackg is going to return to that 714 // PC (because the frame we allocate below has the same 715 // size as cgocallback_gofunc's frame declared above) 716 // so that the traceback will seamlessly trace back into 717 // the earlier calls. 718 // 719 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 720 // 8(SP) is unused. 721 MOVL m_curg(BP), SI 722 MOVL SI, g(CX) 723 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 724 MOVL (g_sched+gobuf_pc)(SI), BP 725 MOVL BP, -4(DI) 726 MOVL ctxt+12(FP), CX 727 LEAL -(4+12)(DI), SP 728 MOVL DX, 4(SP) 729 MOVL CX, 0(SP) 730 CALL runtime·cgocallbackg(SB) 731 MOVL 4(SP), DX 732 733 // Restore g->sched (== m->curg->sched) from saved values. 734 get_tls(CX) 735 MOVL g(CX), SI 736 MOVL 12(SP), BP 737 MOVL BP, (g_sched+gobuf_pc)(SI) 738 LEAL (12+4)(SP), DI 739 MOVL DI, (g_sched+gobuf_sp)(SI) 740 741 // Switch back to m->g0's stack and restore m->g0->sched.sp. 742 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 743 // so we do not have to restore it.) 744 MOVL g(CX), BP 745 MOVL g_m(BP), BP 746 MOVL m_g0(BP), SI 747 MOVL SI, g(CX) 748 MOVL (g_sched+gobuf_sp)(SI), SP 749 MOVL 0(SP), AX 750 MOVL AX, (g_sched+gobuf_sp)(SI) 751 752 // If the m on entry was nil, we called needm above to borrow an m 753 // for the duration of the call. Since the call is over, return it with dropm. 754 CMPL DX, $0 755 JNE 3(PC) 756 MOVL $runtime·dropm(SB), AX 757 CALL AX 758 759 // Done! 760 RET 761 762 // void setg(G*); set g. for use by needm. 763 TEXT runtime·setg(SB), NOSPLIT, $0-4 764 MOVL gg+0(FP), BX 765 #ifdef GOOS_windows 766 CMPL BX, $0 767 JNE settls 768 MOVL $0, 0x14(FS) 769 RET 770 settls: 771 MOVL g_m(BX), AX 772 LEAL m_tls(AX), AX 773 MOVL AX, 0x14(FS) 774 #endif 775 get_tls(CX) 776 MOVL BX, g(CX) 777 RET 778 779 // void setg_gcc(G*); set g. for use by gcc 780 TEXT setg_gcc<>(SB), NOSPLIT, $0 781 get_tls(AX) 782 MOVL gg+0(FP), DX 783 MOVL DX, g(AX) 784 RET 785 786 // check that SP is in range [g->stack.lo, g->stack.hi) 787 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 788 get_tls(CX) 789 MOVL g(CX), AX 790 CMPL (g_stack+stack_hi)(AX), SP 791 JHI 2(PC) 792 INT $3 793 CMPL SP, (g_stack+stack_lo)(AX) 794 JHI 2(PC) 795 INT $3 796 RET 797 798 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 799 MOVL argp+0(FP),AX // addr of first arg 800 MOVL -4(AX),AX // get calling pc 801 MOVL AX, ret+4(FP) 802 RET 803 804 // func cputicks() int64 805 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 806 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 807 JEQ done 808 CMPB runtime·lfenceBeforeRdtsc(SB), $1 809 JNE mfence 810 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 811 JMP done 812 mfence: 813 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 814 done: 815 RDTSC 816 MOVL AX, ret_lo+0(FP) 817 MOVL DX, ret_hi+4(FP) 818 RET 819 820 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 821 // set up ldt 7 to point at m0.tls 822 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 823 // the entry number is just a hint. setldt will set up GS with what it used. 824 MOVL $7, 0(SP) 825 LEAL runtime·m0+m_tls(SB), AX 826 MOVL AX, 4(SP) 827 MOVL $32, 8(SP) // sizeof(tls array) 828 CALL runtime·setldt(SB) 829 RET 830 831 TEXT runtime·emptyfunc(SB),0,$0-0 832 RET 833 834 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 835 // redirects to memhash(p, h, size) using the size 836 // stored in the closure. 837 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 838 GO_ARGS 839 NO_LOCAL_POINTERS 840 MOVL p+0(FP), AX 841 MOVL h+4(FP), BX 842 MOVL 4(DX), CX 843 MOVL AX, 0(SP) 844 MOVL BX, 4(SP) 845 MOVL CX, 8(SP) 846 CALL runtime·memhash(SB) 847 MOVL 12(SP), AX 848 MOVL AX, ret+8(FP) 849 RET 850 851 // hash function using AES hardware instructions 852 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 853 MOVL p+0(FP), AX // ptr to data 854 MOVL s+8(FP), BX // size 855 LEAL ret+12(FP), DX 856 JMP runtime·aeshashbody(SB) 857 858 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 859 MOVL p+0(FP), AX // ptr to string object 860 MOVL 4(AX), BX // length of string 861 MOVL (AX), AX // string data 862 LEAL ret+8(FP), DX 863 JMP runtime·aeshashbody(SB) 864 865 // AX: data 866 // BX: length 867 // DX: address to put return value 868 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 869 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 870 PINSRW $4, BX, X0 // 16 bits of length 871 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 872 MOVO X0, X1 // save unscrambled seed 873 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 874 AESENC X0, X0 // scramble seed 875 876 CMPL BX, $16 877 JB aes0to15 878 JE aes16 879 CMPL BX, $32 880 JBE aes17to32 881 CMPL BX, $64 882 JBE aes33to64 883 JMP aes65plus 884 885 aes0to15: 886 TESTL BX, BX 887 JE aes0 888 889 ADDL $16, AX 890 TESTW $0xff0, AX 891 JE endofpage 892 893 // 16 bytes loaded at this address won't cross 894 // a page boundary, so we can load it directly. 895 MOVOU -16(AX), X1 896 ADDL BX, BX 897 PAND masks<>(SB)(BX*8), X1 898 899 final1: 900 AESENC X0, X1 // scramble input, xor in seed 901 AESENC X1, X1 // scramble combo 2 times 902 AESENC X1, X1 903 MOVL X1, (DX) 904 RET 905 906 endofpage: 907 // address ends in 1111xxxx. Might be up against 908 // a page boundary, so load ending at last byte. 909 // Then shift bytes down using pshufb. 910 MOVOU -32(AX)(BX*1), X1 911 ADDL BX, BX 912 PSHUFB shifts<>(SB)(BX*8), X1 913 JMP final1 914 915 aes0: 916 // Return scrambled input seed 917 AESENC X0, X0 918 MOVL X0, (DX) 919 RET 920 921 aes16: 922 MOVOU (AX), X1 923 JMP final1 924 925 aes17to32: 926 // make second starting seed 927 PXOR runtime·aeskeysched+16(SB), X1 928 AESENC X1, X1 929 930 // load data to be hashed 931 MOVOU (AX), X2 932 MOVOU -16(AX)(BX*1), X3 933 934 // scramble 3 times 935 AESENC X0, X2 936 AESENC X1, X3 937 AESENC X2, X2 938 AESENC X3, X3 939 AESENC X2, X2 940 AESENC X3, X3 941 942 // combine results 943 PXOR X3, X2 944 MOVL X2, (DX) 945 RET 946 947 aes33to64: 948 // make 3 more starting seeds 949 MOVO X1, X2 950 MOVO X1, X3 951 PXOR runtime·aeskeysched+16(SB), X1 952 PXOR runtime·aeskeysched+32(SB), X2 953 PXOR runtime·aeskeysched+48(SB), X3 954 AESENC X1, X1 955 AESENC X2, X2 956 AESENC X3, X3 957 958 MOVOU (AX), X4 959 MOVOU 16(AX), X5 960 MOVOU -32(AX)(BX*1), X6 961 MOVOU -16(AX)(BX*1), X7 962 963 AESENC X0, X4 964 AESENC X1, X5 965 AESENC X2, X6 966 AESENC X3, X7 967 968 AESENC X4, X4 969 AESENC X5, X5 970 AESENC X6, X6 971 AESENC X7, X7 972 973 AESENC X4, X4 974 AESENC X5, X5 975 AESENC X6, X6 976 AESENC X7, X7 977 978 PXOR X6, X4 979 PXOR X7, X5 980 PXOR X5, X4 981 MOVL X4, (DX) 982 RET 983 984 aes65plus: 985 // make 3 more starting seeds 986 MOVO X1, X2 987 MOVO X1, X3 988 PXOR runtime·aeskeysched+16(SB), X1 989 PXOR runtime·aeskeysched+32(SB), X2 990 PXOR runtime·aeskeysched+48(SB), X3 991 AESENC X1, X1 992 AESENC X2, X2 993 AESENC X3, X3 994 995 // start with last (possibly overlapping) block 996 MOVOU -64(AX)(BX*1), X4 997 MOVOU -48(AX)(BX*1), X5 998 MOVOU -32(AX)(BX*1), X6 999 MOVOU -16(AX)(BX*1), X7 1000 1001 // scramble state once 1002 AESENC X0, X4 1003 AESENC X1, X5 1004 AESENC X2, X6 1005 AESENC X3, X7 1006 1007 // compute number of remaining 64-byte blocks 1008 DECL BX 1009 SHRL $6, BX 1010 1011 aesloop: 1012 // scramble state, xor in a block 1013 MOVOU (AX), X0 1014 MOVOU 16(AX), X1 1015 MOVOU 32(AX), X2 1016 MOVOU 48(AX), X3 1017 AESENC X0, X4 1018 AESENC X1, X5 1019 AESENC X2, X6 1020 AESENC X3, X7 1021 1022 // scramble state 1023 AESENC X4, X4 1024 AESENC X5, X5 1025 AESENC X6, X6 1026 AESENC X7, X7 1027 1028 ADDL $64, AX 1029 DECL BX 1030 JNE aesloop 1031 1032 // 2 more scrambles to finish 1033 AESENC X4, X4 1034 AESENC X5, X5 1035 AESENC X6, X6 1036 AESENC X7, X7 1037 1038 AESENC X4, X4 1039 AESENC X5, X5 1040 AESENC X6, X6 1041 AESENC X7, X7 1042 1043 PXOR X6, X4 1044 PXOR X7, X5 1045 PXOR X5, X4 1046 MOVL X4, (DX) 1047 RET 1048 1049 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1050 MOVL p+0(FP), AX // ptr to data 1051 MOVL h+4(FP), X0 // seed 1052 PINSRD $1, (AX), X0 // data 1053 AESENC runtime·aeskeysched+0(SB), X0 1054 AESENC runtime·aeskeysched+16(SB), X0 1055 AESENC runtime·aeskeysched+32(SB), X0 1056 MOVL X0, ret+8(FP) 1057 RET 1058 1059 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1060 MOVL p+0(FP), AX // ptr to data 1061 MOVQ (AX), X0 // data 1062 PINSRD $2, h+4(FP), X0 // seed 1063 AESENC runtime·aeskeysched+0(SB), X0 1064 AESENC runtime·aeskeysched+16(SB), X0 1065 AESENC runtime·aeskeysched+32(SB), X0 1066 MOVL X0, ret+8(FP) 1067 RET 1068 1069 // simple mask to get rid of data in the high part of the register. 1070 DATA masks<>+0x00(SB)/4, $0x00000000 1071 DATA masks<>+0x04(SB)/4, $0x00000000 1072 DATA masks<>+0x08(SB)/4, $0x00000000 1073 DATA masks<>+0x0c(SB)/4, $0x00000000 1074 1075 DATA masks<>+0x10(SB)/4, $0x000000ff 1076 DATA masks<>+0x14(SB)/4, $0x00000000 1077 DATA masks<>+0x18(SB)/4, $0x00000000 1078 DATA masks<>+0x1c(SB)/4, $0x00000000 1079 1080 DATA masks<>+0x20(SB)/4, $0x0000ffff 1081 DATA masks<>+0x24(SB)/4, $0x00000000 1082 DATA masks<>+0x28(SB)/4, $0x00000000 1083 DATA masks<>+0x2c(SB)/4, $0x00000000 1084 1085 DATA masks<>+0x30(SB)/4, $0x00ffffff 1086 DATA masks<>+0x34(SB)/4, $0x00000000 1087 DATA masks<>+0x38(SB)/4, $0x00000000 1088 DATA masks<>+0x3c(SB)/4, $0x00000000 1089 1090 DATA masks<>+0x40(SB)/4, $0xffffffff 1091 DATA masks<>+0x44(SB)/4, $0x00000000 1092 DATA masks<>+0x48(SB)/4, $0x00000000 1093 DATA masks<>+0x4c(SB)/4, $0x00000000 1094 1095 DATA masks<>+0x50(SB)/4, $0xffffffff 1096 DATA masks<>+0x54(SB)/4, $0x000000ff 1097 DATA masks<>+0x58(SB)/4, $0x00000000 1098 DATA masks<>+0x5c(SB)/4, $0x00000000 1099 1100 DATA masks<>+0x60(SB)/4, $0xffffffff 1101 DATA masks<>+0x64(SB)/4, $0x0000ffff 1102 DATA masks<>+0x68(SB)/4, $0x00000000 1103 DATA masks<>+0x6c(SB)/4, $0x00000000 1104 1105 DATA masks<>+0x70(SB)/4, $0xffffffff 1106 DATA masks<>+0x74(SB)/4, $0x00ffffff 1107 DATA masks<>+0x78(SB)/4, $0x00000000 1108 DATA masks<>+0x7c(SB)/4, $0x00000000 1109 1110 DATA masks<>+0x80(SB)/4, $0xffffffff 1111 DATA masks<>+0x84(SB)/4, $0xffffffff 1112 DATA masks<>+0x88(SB)/4, $0x00000000 1113 DATA masks<>+0x8c(SB)/4, $0x00000000 1114 1115 DATA masks<>+0x90(SB)/4, $0xffffffff 1116 DATA masks<>+0x94(SB)/4, $0xffffffff 1117 DATA masks<>+0x98(SB)/4, $0x000000ff 1118 DATA masks<>+0x9c(SB)/4, $0x00000000 1119 1120 DATA masks<>+0xa0(SB)/4, $0xffffffff 1121 DATA masks<>+0xa4(SB)/4, $0xffffffff 1122 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1123 DATA masks<>+0xac(SB)/4, $0x00000000 1124 1125 DATA masks<>+0xb0(SB)/4, $0xffffffff 1126 DATA masks<>+0xb4(SB)/4, $0xffffffff 1127 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1128 DATA masks<>+0xbc(SB)/4, $0x00000000 1129 1130 DATA masks<>+0xc0(SB)/4, $0xffffffff 1131 DATA masks<>+0xc4(SB)/4, $0xffffffff 1132 DATA masks<>+0xc8(SB)/4, $0xffffffff 1133 DATA masks<>+0xcc(SB)/4, $0x00000000 1134 1135 DATA masks<>+0xd0(SB)/4, $0xffffffff 1136 DATA masks<>+0xd4(SB)/4, $0xffffffff 1137 DATA masks<>+0xd8(SB)/4, $0xffffffff 1138 DATA masks<>+0xdc(SB)/4, $0x000000ff 1139 1140 DATA masks<>+0xe0(SB)/4, $0xffffffff 1141 DATA masks<>+0xe4(SB)/4, $0xffffffff 1142 DATA masks<>+0xe8(SB)/4, $0xffffffff 1143 DATA masks<>+0xec(SB)/4, $0x0000ffff 1144 1145 DATA masks<>+0xf0(SB)/4, $0xffffffff 1146 DATA masks<>+0xf4(SB)/4, $0xffffffff 1147 DATA masks<>+0xf8(SB)/4, $0xffffffff 1148 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1149 1150 GLOBL masks<>(SB),RODATA,$256 1151 1152 // these are arguments to pshufb. They move data down from 1153 // the high bytes of the register to the low bytes of the register. 1154 // index is how many bytes to move. 1155 DATA shifts<>+0x00(SB)/4, $0x00000000 1156 DATA shifts<>+0x04(SB)/4, $0x00000000 1157 DATA shifts<>+0x08(SB)/4, $0x00000000 1158 DATA shifts<>+0x0c(SB)/4, $0x00000000 1159 1160 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1161 DATA shifts<>+0x14(SB)/4, $0xffffffff 1162 DATA shifts<>+0x18(SB)/4, $0xffffffff 1163 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1164 1165 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1166 DATA shifts<>+0x24(SB)/4, $0xffffffff 1167 DATA shifts<>+0x28(SB)/4, $0xffffffff 1168 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1169 1170 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1171 DATA shifts<>+0x34(SB)/4, $0xffffffff 1172 DATA shifts<>+0x38(SB)/4, $0xffffffff 1173 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1174 1175 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1176 DATA shifts<>+0x44(SB)/4, $0xffffffff 1177 DATA shifts<>+0x48(SB)/4, $0xffffffff 1178 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1179 1180 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1181 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1182 DATA shifts<>+0x58(SB)/4, $0xffffffff 1183 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1184 1185 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1186 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1187 DATA shifts<>+0x68(SB)/4, $0xffffffff 1188 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1189 1190 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1191 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1192 DATA shifts<>+0x78(SB)/4, $0xffffffff 1193 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1194 1195 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1196 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1197 DATA shifts<>+0x88(SB)/4, $0xffffffff 1198 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1199 1200 DATA shifts<>+0x90(SB)/4, $0x0a090807 1201 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1202 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1203 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1204 1205 DATA shifts<>+0xa0(SB)/4, $0x09080706 1206 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1207 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1208 DATA shifts<>+0xac(SB)/4, $0xffffffff 1209 1210 DATA shifts<>+0xb0(SB)/4, $0x08070605 1211 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1212 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1213 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1214 1215 DATA shifts<>+0xc0(SB)/4, $0x07060504 1216 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1217 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1218 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1219 1220 DATA shifts<>+0xd0(SB)/4, $0x06050403 1221 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1222 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1223 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1224 1225 DATA shifts<>+0xe0(SB)/4, $0x05040302 1226 DATA shifts<>+0xe4(SB)/4, $0x09080706 1227 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1228 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1229 1230 DATA shifts<>+0xf0(SB)/4, $0x04030201 1231 DATA shifts<>+0xf4(SB)/4, $0x08070605 1232 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1233 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1234 1235 GLOBL shifts<>(SB),RODATA,$256 1236 1237 TEXT ·checkASM(SB),NOSPLIT,$0-1 1238 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1239 MOVL $masks<>(SB), AX 1240 MOVL $shifts<>(SB), BX 1241 ORL BX, AX 1242 TESTL $15, AX 1243 SETEQ ret+0(FP) 1244 RET 1245 1246 // memequal(p, q unsafe.Pointer, size uintptr) bool 1247 TEXT runtime·memequal(SB),NOSPLIT,$0-13 1248 MOVL a+0(FP), SI 1249 MOVL b+4(FP), DI 1250 CMPL SI, DI 1251 JEQ eq 1252 MOVL size+8(FP), BX 1253 LEAL ret+12(FP), AX 1254 JMP runtime·memeqbody(SB) 1255 eq: 1256 MOVB $1, ret+12(FP) 1257 RET 1258 1259 // memequal_varlen(a, b unsafe.Pointer) bool 1260 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1261 MOVL a+0(FP), SI 1262 MOVL b+4(FP), DI 1263 CMPL SI, DI 1264 JEQ eq 1265 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1266 LEAL ret+8(FP), AX 1267 JMP runtime·memeqbody(SB) 1268 eq: 1269 MOVB $1, ret+8(FP) 1270 RET 1271 1272 // eqstring tests whether two strings are equal. 1273 // The compiler guarantees that strings passed 1274 // to eqstring have equal length. 1275 // See runtime_test.go:eqstring_generic for 1276 // equivalent Go code. 1277 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1278 MOVL s1_base+0(FP), SI 1279 MOVL s2_base+8(FP), DI 1280 CMPL SI, DI 1281 JEQ same 1282 MOVL s1_len+4(FP), BX 1283 LEAL ret+16(FP), AX 1284 JMP runtime·memeqbody(SB) 1285 same: 1286 MOVB $1, ret+16(FP) 1287 RET 1288 1289 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1290 MOVL a_len+4(FP), BX 1291 MOVL b_len+16(FP), CX 1292 CMPL BX, CX 1293 JNE eqret 1294 MOVL a+0(FP), SI 1295 MOVL b+12(FP), DI 1296 LEAL ret+24(FP), AX 1297 JMP runtime·memeqbody(SB) 1298 eqret: 1299 MOVB $0, ret+24(FP) 1300 RET 1301 1302 // a in SI 1303 // b in DI 1304 // count in BX 1305 // address of result byte in AX 1306 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1307 CMPL BX, $4 1308 JB small 1309 1310 // 64 bytes at a time using xmm registers 1311 hugeloop: 1312 CMPL BX, $64 1313 JB bigloop 1314 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1315 JE bigloop 1316 MOVOU (SI), X0 1317 MOVOU (DI), X1 1318 MOVOU 16(SI), X2 1319 MOVOU 16(DI), X3 1320 MOVOU 32(SI), X4 1321 MOVOU 32(DI), X5 1322 MOVOU 48(SI), X6 1323 MOVOU 48(DI), X7 1324 PCMPEQB X1, X0 1325 PCMPEQB X3, X2 1326 PCMPEQB X5, X4 1327 PCMPEQB X7, X6 1328 PAND X2, X0 1329 PAND X6, X4 1330 PAND X4, X0 1331 PMOVMSKB X0, DX 1332 ADDL $64, SI 1333 ADDL $64, DI 1334 SUBL $64, BX 1335 CMPL DX, $0xffff 1336 JEQ hugeloop 1337 MOVB $0, (AX) 1338 RET 1339 1340 // 4 bytes at a time using 32-bit register 1341 bigloop: 1342 CMPL BX, $4 1343 JBE leftover 1344 MOVL (SI), CX 1345 MOVL (DI), DX 1346 ADDL $4, SI 1347 ADDL $4, DI 1348 SUBL $4, BX 1349 CMPL CX, DX 1350 JEQ bigloop 1351 MOVB $0, (AX) 1352 RET 1353 1354 // remaining 0-4 bytes 1355 leftover: 1356 MOVL -4(SI)(BX*1), CX 1357 MOVL -4(DI)(BX*1), DX 1358 CMPL CX, DX 1359 SETEQ (AX) 1360 RET 1361 1362 small: 1363 CMPL BX, $0 1364 JEQ equal 1365 1366 LEAL 0(BX*8), CX 1367 NEGL CX 1368 1369 MOVL SI, DX 1370 CMPB DX, $0xfc 1371 JA si_high 1372 1373 // load at SI won't cross a page boundary. 1374 MOVL (SI), SI 1375 JMP si_finish 1376 si_high: 1377 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1378 MOVL -4(SI)(BX*1), SI 1379 SHRL CX, SI 1380 si_finish: 1381 1382 // same for DI. 1383 MOVL DI, DX 1384 CMPB DX, $0xfc 1385 JA di_high 1386 MOVL (DI), DI 1387 JMP di_finish 1388 di_high: 1389 MOVL -4(DI)(BX*1), DI 1390 SHRL CX, DI 1391 di_finish: 1392 1393 SUBL SI, DI 1394 SHLL CX, DI 1395 equal: 1396 SETEQ (AX) 1397 RET 1398 1399 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1400 MOVL s1_base+0(FP), SI 1401 MOVL s1_len+4(FP), BX 1402 MOVL s2_base+8(FP), DI 1403 MOVL s2_len+12(FP), DX 1404 LEAL ret+16(FP), AX 1405 JMP runtime·cmpbody(SB) 1406 1407 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1408 MOVL s1+0(FP), SI 1409 MOVL s1+4(FP), BX 1410 MOVL s2+12(FP), DI 1411 MOVL s2+16(FP), DX 1412 LEAL ret+24(FP), AX 1413 JMP runtime·cmpbody(SB) 1414 1415 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1416 MOVL s+0(FP), SI 1417 MOVL s_len+4(FP), CX 1418 MOVB c+12(FP), AL 1419 MOVL SI, DI 1420 CLD; REPN; SCASB 1421 JZ 3(PC) 1422 MOVL $-1, ret+16(FP) 1423 RET 1424 SUBL SI, DI 1425 SUBL $1, DI 1426 MOVL DI, ret+16(FP) 1427 RET 1428 1429 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1430 MOVL s+0(FP), SI 1431 MOVL s_len+4(FP), CX 1432 MOVB c+8(FP), AL 1433 MOVL SI, DI 1434 CLD; REPN; SCASB 1435 JZ 3(PC) 1436 MOVL $-1, ret+12(FP) 1437 RET 1438 SUBL SI, DI 1439 SUBL $1, DI 1440 MOVL DI, ret+12(FP) 1441 RET 1442 1443 // input: 1444 // SI = a 1445 // DI = b 1446 // BX = alen 1447 // DX = blen 1448 // AX = address of return word (set to 1/0/-1) 1449 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1450 MOVL DX, BP 1451 SUBL BX, DX // DX = blen-alen 1452 JLE 2(PC) 1453 MOVL BX, BP // BP = min(alen, blen) 1454 CMPL SI, DI 1455 JEQ allsame 1456 CMPL BP, $4 1457 JB small 1458 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1459 JE mediumloop 1460 largeloop: 1461 CMPL BP, $16 1462 JB mediumloop 1463 MOVOU (SI), X0 1464 MOVOU (DI), X1 1465 PCMPEQB X0, X1 1466 PMOVMSKB X1, BX 1467 XORL $0xffff, BX // convert EQ to NE 1468 JNE diff16 // branch if at least one byte is not equal 1469 ADDL $16, SI 1470 ADDL $16, DI 1471 SUBL $16, BP 1472 JMP largeloop 1473 1474 diff16: 1475 BSFL BX, BX // index of first byte that differs 1476 XORL DX, DX 1477 MOVB (SI)(BX*1), CX 1478 CMPB CX, (DI)(BX*1) 1479 SETHI DX 1480 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1481 MOVL DX, (AX) 1482 RET 1483 1484 mediumloop: 1485 CMPL BP, $4 1486 JBE _0through4 1487 MOVL (SI), BX 1488 MOVL (DI), CX 1489 CMPL BX, CX 1490 JNE diff4 1491 ADDL $4, SI 1492 ADDL $4, DI 1493 SUBL $4, BP 1494 JMP mediumloop 1495 1496 _0through4: 1497 MOVL -4(SI)(BP*1), BX 1498 MOVL -4(DI)(BP*1), CX 1499 CMPL BX, CX 1500 JEQ allsame 1501 1502 diff4: 1503 BSWAPL BX // reverse order of bytes 1504 BSWAPL CX 1505 XORL BX, CX // find bit differences 1506 BSRL CX, CX // index of highest bit difference 1507 SHRL CX, BX // move a's bit to bottom 1508 ANDL $1, BX // mask bit 1509 LEAL -1(BX*2), BX // 1/0 => +1/-1 1510 MOVL BX, (AX) 1511 RET 1512 1513 // 0-3 bytes in common 1514 small: 1515 LEAL (BP*8), CX 1516 NEGL CX 1517 JEQ allsame 1518 1519 // load si 1520 CMPB SI, $0xfc 1521 JA si_high 1522 MOVL (SI), SI 1523 JMP si_finish 1524 si_high: 1525 MOVL -4(SI)(BP*1), SI 1526 SHRL CX, SI 1527 si_finish: 1528 SHLL CX, SI 1529 1530 // same for di 1531 CMPB DI, $0xfc 1532 JA di_high 1533 MOVL (DI), DI 1534 JMP di_finish 1535 di_high: 1536 MOVL -4(DI)(BP*1), DI 1537 SHRL CX, DI 1538 di_finish: 1539 SHLL CX, DI 1540 1541 BSWAPL SI // reverse order of bytes 1542 BSWAPL DI 1543 XORL SI, DI // find bit differences 1544 JEQ allsame 1545 BSRL DI, CX // index of highest bit difference 1546 SHRL CX, SI // move a's bit to bottom 1547 ANDL $1, SI // mask bit 1548 LEAL -1(SI*2), BX // 1/0 => +1/-1 1549 MOVL BX, (AX) 1550 RET 1551 1552 // all the bytes in common are the same, so we just need 1553 // to compare the lengths. 1554 allsame: 1555 XORL BX, BX 1556 XORL CX, CX 1557 TESTL DX, DX 1558 SETLT BX // 1 if alen > blen 1559 SETEQ CX // 1 if alen == blen 1560 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1561 MOVL BX, (AX) 1562 RET 1563 1564 TEXT runtime·return0(SB), NOSPLIT, $0 1565 MOVL $0, AX 1566 RET 1567 1568 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1569 // Must obey the gcc calling convention. 1570 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1571 get_tls(CX) 1572 MOVL g(CX), AX 1573 MOVL g_m(AX), AX 1574 MOVL m_curg(AX), AX 1575 MOVL (g_stack+stack_hi)(AX), AX 1576 RET 1577 1578 // The top-most function running on a goroutine 1579 // returns to goexit+PCQuantum. 1580 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1581 BYTE $0x90 // NOP 1582 CALL runtime·goexit1(SB) // does not return 1583 // traceback from goexit1 must hit code range of goexit 1584 BYTE $0x90 // NOP 1585 1586 // Prefetching doesn't seem to help. 1587 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1588 RET 1589 1590 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1591 RET 1592 1593 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1594 RET 1595 1596 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1597 RET 1598 1599 // Add a module's moduledata to the linked list of moduledata objects. This 1600 // is called from .init_array by a function generated in the linker and so 1601 // follows the platform ABI wrt register preservation -- it only touches AX, 1602 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1603 // instead the pointer to the moduledata is passed in AX. 1604 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1605 MOVL runtime·lastmoduledatap(SB), DX 1606 MOVL AX, moduledata_next(DX) 1607 MOVL AX, runtime·lastmoduledatap(SB) 1608 RET 1609 1610 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1611 MOVL a+0(FP), AX 1612 MOVL AX, 0(SP) 1613 MOVL $0, 4(SP) 1614 FMOVV 0(SP), F0 1615 FMOVDP F0, ret+4(FP) 1616 RET 1617 1618 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1619 FMOVD a+0(FP), F0 1620 FSTCW 0(SP) 1621 FLDCW runtime·controlWord64trunc(SB) 1622 FMOVVP F0, 4(SP) 1623 FLDCW 0(SP) 1624 MOVL 4(SP), AX 1625 MOVL AX, ret+8(FP) 1626 RET